1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex*) 12 #include <superlu_zdefs.h> 13 #define LUstructInit zLUstructInit 14 #define ScalePermstructInit zScalePermstructInit 15 #define ScalePermstructFree zScalePermstructFree 16 #define LUstructFree zLUstructFree 17 #define Destroy_LU zDestroy_LU 18 #define ScalePermstruct_t zScalePermstruct_t 19 #define LUstruct_t zLUstruct_t 20 #define SOLVEstruct_t zSOLVEstruct_t 21 #define SolveFinalize zSolveFinalize 22 #define pgssvx pzgssvx 23 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 24 #define pGetDiagU pzGetDiagU 25 #define allocateA_dist zallocateA_dist 26 #define SLU_ZD SLU_Z 27 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 28 #define DeAllocLlu_3d zDeAllocLlu_3d 29 #define DeAllocGlu_3d zDeAllocGlu_3d 30 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 31 #define pgssvx3d pzgssvx3d 32 #endif 33 #else 34 #define CASTDOUBLECOMPLEX 35 #include <superlu_ddefs.h> 36 #define LUstructInit dLUstructInit 37 #define ScalePermstructInit dScalePermstructInit 38 #define ScalePermstructFree dScalePermstructFree 39 #define LUstructFree dLUstructFree 40 #define Destroy_LU dDestroy_LU 41 #define ScalePermstruct_t dScalePermstruct_t 42 #define LUstruct_t dLUstruct_t 43 #define SOLVEstruct_t dSOLVEstruct_t 44 #define SolveFinalize dSolveFinalize 45 #define pgssvx pdgssvx 46 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 47 #define pGetDiagU pdGetDiagU 48 #define allocateA_dist dallocateA_dist 49 #define SLU_ZD SLU_D 50 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 51 #define DeAllocLlu_3d dDeAllocLlu_3d 52 #define DeAllocGlu_3d dDeAllocGlu_3d 53 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 54 #define pgssvx3d pdgssvx3d 55 #endif 56 #endif 57 EXTERN_C_END 58 59 typedef struct { 60 int_t nprow,npcol,*row,*col; 61 gridinfo_t grid; 62 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 63 PetscBool use3d; 64 int_t npdep; /* replication factor, must be power of two */ 65 gridinfo3d_t grid3d; 66 #endif 67 superlu_dist_options_t options; 68 SuperMatrix A_sup; 69 ScalePermstruct_t ScalePermstruct; 70 LUstruct_t LUstruct; 71 int StatPrint; 72 SOLVEstruct_t SOLVEstruct; 73 fact_t FactPattern; 74 MPI_Comm comm_superlu; 75 #if defined(PETSC_USE_COMPLEX) 76 doublecomplex *val; 77 #else 78 double *val; 79 #endif 80 PetscBool matsolve_iscalled,matmatsolve_iscalled; 81 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 82 } Mat_SuperLU_DIST; 83 84 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 85 { 86 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 87 88 PetscFunctionBegin; 89 PetscStackCall("SuperLU_DIST:pGetDiagU",pGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,CASTDOUBLECOMPLEX diagU)); 90 PetscFunctionReturn(0); 91 } 92 93 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 94 { 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 99 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 104 typedef struct { 105 MPI_Comm comm; 106 PetscBool busy; 107 gridinfo_t grid; 108 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 109 PetscBool use3d; 110 gridinfo3d_t grid3d; 111 #endif 112 } PetscSuperLU_DIST; 113 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 114 115 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *attr_val,void *extra_state) 116 { 117 PetscErrorCode ierr; 118 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *) attr_val; 119 120 PetscFunctionBegin; 121 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Unexpected keyval"); 122 ierr = PetscInfo(NULL,"Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n");CHKERRQ(ierr); 123 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 124 if (context->use3d) { 125 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&context->grid3d)); 126 } else 127 #endif 128 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&context->grid)); 129 ierr = MPI_Comm_free(&context->comm);CHKERRMPI(ierr); 130 ierr = PetscFree(context);CHKERRQ(ierr); 131 PetscFunctionReturn(MPI_SUCCESS); 132 } 133 134 /* 135 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 136 users who do not destroy all PETSc objects before PetscFinalize(). 137 138 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 139 can still check that the keyval associated with the MPI communicator is correct when the MPI 140 communicator is destroyed. 141 142 This is called in PetscFinalize() 143 */ 144 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 145 { 146 PetscErrorCode ierr; 147 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 148 149 PetscFunctionBegin; 150 ierr = PetscInfo(NULL,"Freeing Petsc_Superlu_dist_keyval\n");CHKERRQ(ierr); 151 ierr = MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp);CHKERRMPI(ierr); 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 156 { 157 PetscErrorCode ierr; 158 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 159 160 PetscFunctionBegin; 161 if (lu->CleanUpSuperLU_Dist) { 162 /* Deallocate SuperLU_DIST storage */ 163 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 164 if (lu->options.SolveInitialized) { 165 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 166 } 167 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 168 if (lu->use3d) { 169 if (lu->grid3d.zscp.Iam == 0) { 170 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 171 } else { 172 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 173 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 174 } 175 PetscStackCall("SuperLU_DIST:Destroy_A3d_gathered_on_2d",Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 176 } else 177 #endif 178 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 179 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 180 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 181 182 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 183 if (lu->comm_superlu) { 184 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 185 if (lu->use3d) { 186 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&lu->grid3d)); 187 } else 188 #endif 189 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 190 ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);CHKERRQ(ierr); 191 } else { 192 PetscSuperLU_DIST *context; 193 MPI_Comm comm; 194 PetscMPIInt flg; 195 196 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 197 ierr = MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);CHKERRMPI(ierr); 198 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 199 context->busy = PETSC_FALSE; 200 } 201 } 202 ierr = PetscFree(A->data);CHKERRQ(ierr); 203 /* clear composed functions */ 204 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 205 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 206 207 PetscFunctionReturn(0); 208 } 209 210 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 211 { 212 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 213 PetscErrorCode ierr; 214 PetscInt m=A->rmap->n; 215 SuperLUStat_t stat; 216 double berr[1]; 217 PetscScalar *bptr=NULL; 218 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 219 static PetscBool cite = PETSC_FALSE; 220 221 PetscFunctionBegin; 222 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 223 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 224 225 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 226 /* see comments in MatMatSolve() */ 227 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 228 lu->options.SolveInitialized = NO; 229 } 230 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 231 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 232 233 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 234 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 235 if (lu->use3d) 236 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 237 else 238 #endif 239 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 240 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 241 242 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 243 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 244 245 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 246 lu->matsolve_iscalled = PETSC_TRUE; 247 lu->matmatsolve_iscalled = PETSC_FALSE; 248 PetscFunctionReturn(0); 249 } 250 251 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 252 { 253 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 254 PetscErrorCode ierr; 255 PetscInt m=A->rmap->n,nrhs; 256 SuperLUStat_t stat; 257 double berr[1]; 258 PetscScalar *bptr; 259 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 260 PetscBool flg; 261 262 PetscFunctionBegin; 263 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 264 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 265 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 266 if (X != B_mpi) { 267 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 268 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 269 } 270 271 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 272 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 273 thus destroy it and create a new SOLVEstruct. 274 Otherwise it may result in memory corruption or incorrect solution 275 See src/mat/tests/ex125.c */ 276 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 277 lu->options.SolveInitialized = NO; 278 } 279 if (X != B_mpi) { 280 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 281 } 282 283 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 284 285 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 286 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 287 288 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 289 if (lu->use3d) 290 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 291 else 292 #endif 293 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 294 295 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 296 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 297 298 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 299 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 300 lu->matsolve_iscalled = PETSC_FALSE; 301 lu->matmatsolve_iscalled = PETSC_TRUE; 302 PetscFunctionReturn(0); 303 } 304 305 /* 306 input: 307 F: numeric Cholesky factor 308 output: 309 nneg: total number of negative pivots 310 nzero: total number of zero pivots 311 npos: (global dimension of F) - nneg - nzero 312 */ 313 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 314 { 315 PetscErrorCode ierr; 316 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 317 PetscScalar *diagU=NULL; 318 PetscInt M,i,neg=0,zero=0,pos=0; 319 PetscReal r; 320 321 PetscFunctionBegin; 322 PetscCheck(F->assembled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 323 PetscCheck(lu->options.RowPerm == NOROWPERM,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 324 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 325 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 326 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 327 for (i=0; i<M; i++) { 328 #if defined(PETSC_USE_COMPLEX) 329 r = PetscImaginaryPart(diagU[i])/10.0; 330 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%" PetscInt_FMT "]=%g + i %g is non-real",i,(double)PetscRealPart(diagU[i]),(double)(r*10.0)); 331 r = PetscRealPart(diagU[i]); 332 #else 333 r = diagU[i]; 334 #endif 335 if (r > 0) { 336 pos++; 337 } else if (r < 0) { 338 neg++; 339 } else zero++; 340 } 341 342 ierr = PetscFree(diagU);CHKERRQ(ierr); 343 if (nneg) *nneg = neg; 344 if (nzero) *nzero = zero; 345 if (npos) *npos = pos; 346 PetscFunctionReturn(0); 347 } 348 349 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 350 { 351 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 352 Mat Aloc; 353 const PetscScalar *av; 354 const PetscInt *ai=NULL,*aj=NULL; 355 PetscInt nz,dummy; 356 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 357 SuperLUStat_t stat; 358 double *berr=0; 359 PetscBool ismpiaij,isseqaij,flg; 360 PetscErrorCode ierr; 361 362 PetscFunctionBegin; 363 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 364 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 365 if (ismpiaij) { 366 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc);CHKERRQ(ierr); 367 } else if (isseqaij) { 368 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 369 Aloc = A; 370 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 371 372 ierr = MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 373 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 374 ierr = MatSeqAIJGetArrayRead(Aloc,&av);CHKERRQ(ierr); 375 nz = ai[Aloc->rmap->n]; 376 377 /* Allocations for A_sup */ 378 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 379 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 380 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 381 if (lu->FactPattern == SamePattern_SameRowPerm) { 382 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 383 } else if (lu->FactPattern == SamePattern) { 384 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 385 if (lu->use3d) { 386 if (lu->grid3d.zscp.Iam == 0) { 387 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 388 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 389 } else { 390 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 391 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 392 } 393 } else 394 #endif 395 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 396 lu->options.Fact = SamePattern; 397 } else if (lu->FactPattern == DOFACT) { 398 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 399 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 400 lu->options.Fact = DOFACT; 401 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 402 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 403 } 404 405 /* Copy AIJ matrix to superlu_dist matrix */ 406 ierr = PetscArraycpy(lu->row,ai,Aloc->rmap->n+1);CHKERRQ(ierr); 407 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 408 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 409 ierr = MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 410 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 411 ierr = MatSeqAIJRestoreArrayRead(Aloc,&av);CHKERRQ(ierr); 412 ierr = MatDestroy(&Aloc);CHKERRQ(ierr); 413 414 /* Create and setup A_sup */ 415 if (lu->options.Fact == DOFACT) { 416 PetscStackCall("SuperLU_DIST:Create_CompRowLoc_Matrix_dist",Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_ZD, SLU_GE)); 417 } 418 419 /* Factor the matrix. */ 420 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 421 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 422 if (lu->use3d) { 423 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 424 } else 425 #endif 426 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 427 if (sinfo > 0) { 428 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %d",sinfo); 429 else { 430 if (sinfo <= lu->A_sup.ncol) { 431 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 432 ierr = PetscInfo(F,"U(i,i) is exactly zero, i= %d\n",sinfo);CHKERRQ(ierr); 433 } else if (sinfo > lu->A_sup.ncol) { 434 /* 435 number of bytes allocated when memory allocation 436 failure occurred, plus A->ncol. 437 */ 438 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 439 ierr = PetscInfo(F,"Number of bytes allocated when memory allocation fails %d\n",sinfo);CHKERRQ(ierr); 440 } 441 } 442 } else PetscCheck(sinfo >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 443 444 if (lu->options.PrintStat) { 445 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 446 } 447 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 448 F->assembled = PETSC_TRUE; 449 F->preallocated = PETSC_TRUE; 450 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 451 PetscFunctionReturn(0); 452 } 453 454 /* Note the Petsc r and c permutations are ignored */ 455 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 456 { 457 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 458 PetscInt M = A->rmap->N,N=A->cmap->N; 459 460 PetscFunctionBegin; 461 /* Initialize ScalePermstruct and LUstruct. */ 462 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 463 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 464 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 465 F->ops->solve = MatSolve_SuperLU_DIST; 466 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 467 F->ops->getinertia = NULL; 468 469 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 470 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 471 PetscFunctionReturn(0); 472 } 473 474 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 475 { 476 PetscErrorCode ierr; 477 478 PetscFunctionBegin; 479 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 480 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 481 PetscFunctionReturn(0); 482 } 483 484 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 485 { 486 PetscFunctionBegin; 487 *type = MATSOLVERSUPERLU_DIST; 488 PetscFunctionReturn(0); 489 } 490 491 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 492 { 493 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 494 superlu_dist_options_t options; 495 PetscErrorCode ierr; 496 497 PetscFunctionBegin; 498 /* check if matrix is superlu_dist type */ 499 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 500 501 options = lu->options; 502 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 503 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 504 * format spec for int64_t is set to %d for whatever reason */ 505 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %lld x npcol %lld \n",(long long)lu->nprow,(long long)lu->npcol);CHKERRQ(ierr); 506 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 507 if (lu->use3d) { 508 ierr = PetscViewerASCIIPrintf(viewer," Using 3d decomposition with npdep %lld \n",(long long)lu->npdep);CHKERRQ(ierr); 509 } 510 #endif 511 512 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 513 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 514 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 515 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %lld col partition %lld \n",(long long)lu->nprow,(long long)lu->npcol);CHKERRQ(ierr); 516 517 switch (options.RowPerm) { 518 case NOROWPERM: 519 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 520 break; 521 case LargeDiag_MC64: 522 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 523 break; 524 case LargeDiag_AWPM: 525 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 526 break; 527 case MY_PERMR: 528 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 529 break; 530 default: 531 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 532 } 533 534 switch (options.ColPerm) { 535 case NATURAL: 536 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 537 break; 538 case MMD_AT_PLUS_A: 539 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 540 break; 541 case MMD_ATA: 542 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 543 break; 544 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 545 case METIS_AT_PLUS_A: 546 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 547 break; 548 case PARMETIS: 549 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 550 break; 551 default: 552 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 553 } 554 555 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 556 557 if (lu->FactPattern == SamePattern) { 558 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 559 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 560 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 561 } else if (lu->FactPattern == DOFACT) { 562 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 563 } else { 564 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 565 } 566 PetscFunctionReturn(0); 567 } 568 569 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 570 { 571 PetscErrorCode ierr; 572 PetscBool iascii; 573 PetscViewerFormat format; 574 575 PetscFunctionBegin; 576 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 577 if (iascii) { 578 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 579 if (format == PETSC_VIEWER_ASCII_INFO) { 580 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 587 { 588 Mat B; 589 Mat_SuperLU_DIST *lu; 590 PetscErrorCode ierr; 591 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 592 PetscMPIInt size; 593 superlu_dist_options_t options; 594 PetscBool flg; 595 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 596 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 597 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 598 PetscBool set; 599 600 PetscFunctionBegin; 601 /* Create the factorization matrix */ 602 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 603 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 604 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 605 ierr = MatSetUp(B);CHKERRQ(ierr); 606 B->ops->getinfo = MatGetInfo_External; 607 B->ops->view = MatView_SuperLU_DIST; 608 B->ops->destroy = MatDestroy_SuperLU_DIST; 609 610 /* Set the default input options: 611 options.Fact = DOFACT; 612 options.Equil = YES; 613 options.ParSymbFact = NO; 614 options.ColPerm = METIS_AT_PLUS_A; 615 options.RowPerm = LargeDiag_MC64; 616 options.ReplaceTinyPivot = YES; 617 options.IterRefine = DOUBLE; 618 options.Trans = NOTRANS; 619 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 620 options.RefineInitialized = NO; 621 options.PrintStat = YES; 622 options.SymPattern = NO; 623 */ 624 set_default_options_dist(&options); 625 626 B->trivialsymbolic = PETSC_TRUE; 627 if (ftype == MAT_FACTOR_LU) { 628 B->factortype = MAT_FACTOR_LU; 629 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 630 } else { 631 B->factortype = MAT_FACTOR_CHOLESKY; 632 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 633 options.SymPattern = YES; 634 } 635 636 /* set solvertype */ 637 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 638 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 639 640 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 641 B->data = lu; 642 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 643 644 { 645 PetscMPIInt flg; 646 MPI_Comm comm; 647 PetscSuperLU_DIST *context = NULL; 648 649 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 650 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 651 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_Superlu_dist_keyval_Delete_Fn,&Petsc_Superlu_dist_keyval,(void*)0);CHKERRMPI(ierr); 652 ierr = PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free);CHKERRQ(ierr); 653 } 654 ierr = MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);CHKERRMPI(ierr); 655 if (!flg || context->busy) { 656 if (!flg) { 657 ierr = PetscNew(&context);CHKERRQ(ierr); 658 context->busy = PETSC_TRUE; 659 ierr = MPI_Comm_dup(comm,&context->comm);CHKERRMPI(ierr); 660 ierr = MPI_Comm_set_attr(comm,Petsc_Superlu_dist_keyval,context);CHKERRMPI(ierr); 661 } else { 662 ierr = PetscCommGetComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);CHKERRQ(ierr); 663 } 664 665 /* Default number of process columns and rows */ 666 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 667 if (!lu->nprow) lu->nprow = 1; 668 while (lu->nprow > 0) { 669 lu->npcol = (int_t) (size/lu->nprow); 670 if (size == lu->nprow * lu->npcol) break; 671 lu->nprow--; 672 } 673 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 674 lu->use3d = PETSC_FALSE; 675 lu->npdep = 1; 676 #endif 677 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 678 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 679 ierr = PetscOptionsBool("-mat_superlu_dist_3d","Use SuperLU_DIST 3D distribution","None",lu->use3d,&lu->use3d,NULL);CHKERRQ(ierr); 680 if (lu->use3d) { 681 PetscInt t; 682 ierr = PetscOptionsInt("-mat_superlu_dist_d","Number of z entries in processor partition","None",lu->npdep,(PetscInt*)&lu->npdep,NULL);CHKERRQ(ierr); 683 t = (PetscInt) PetscLog2Real((PetscReal)lu->npdep); 684 PetscCheck(PetscPowInt(2,t) == lu->npdep,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"-mat_superlu_dist_d %lld must be a power of 2",(long long)lu->npdep); 685 if (lu->npdep > 1) { 686 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)(size/lu->npdep))); 687 if (!lu->nprow) lu->nprow = 1; 688 while (lu->nprow > 0) { 689 lu->npcol = (int_t) (size/(lu->npdep*lu->nprow)); 690 if (size == lu->nprow * lu->npcol * lu->npdep) break; 691 lu->nprow--; 692 } 693 } 694 } 695 #endif 696 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 697 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 698 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 699 PetscCheck(size == lu->nprow*lu->npcol*lu->npdep,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld",size,(long long)lu->nprow,(long long)lu->npcol,(long long)lu->npdep); 700 #else 701 PetscCheck(size == lu->nprow*lu->npcol,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld",size,(long long)lu->nprow,(long long)lu->npcol); 702 #endif 703 ierr = PetscOptionsEnd();CHKERRQ(ierr); 704 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 705 if (lu->use3d) { 706 PetscStackCall("SuperLU_DIST:superlu_gridinit3d",superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol,lu->npdep, &lu->grid3d)); 707 if (context) {context->grid3d = lu->grid3d; context->use3d = lu->use3d;} 708 } else { 709 #endif 710 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 711 if (context) context->grid = lu->grid; 712 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 713 } 714 #endif 715 ierr = PetscInfo(NULL,"Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n");CHKERRQ(ierr); 716 if (flg) { 717 ierr = PetscInfo(NULL,"Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n");CHKERRQ(ierr); 718 } else { 719 ierr = PetscInfo(NULL,"Storing communicator and SuperLU_DIST grid in communicator attribute\n");CHKERRQ(ierr); 720 } 721 } else { 722 ierr = PetscInfo(NULL,"Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.");CHKERRQ(ierr); 723 context->busy = PETSC_TRUE; 724 lu->grid = context->grid; 725 } 726 } 727 728 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 729 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 730 if (set && !flg) options.Equil = NO; 731 732 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 733 if (flg) { 734 switch (indx) { 735 case 0: 736 options.RowPerm = NOROWPERM; 737 break; 738 case 1: 739 options.RowPerm = LargeDiag_MC64; 740 break; 741 case 2: 742 options.RowPerm = LargeDiag_AWPM; 743 break; 744 case 3: 745 options.RowPerm = MY_PERMR; 746 break; 747 default: 748 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 749 } 750 } 751 752 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 753 if (flg) { 754 switch (indx) { 755 case 0: 756 options.ColPerm = NATURAL; 757 break; 758 case 1: 759 options.ColPerm = MMD_AT_PLUS_A; 760 break; 761 case 2: 762 options.ColPerm = MMD_ATA; 763 break; 764 case 3: 765 options.ColPerm = METIS_AT_PLUS_A; 766 break; 767 case 4: 768 options.ColPerm = PARMETIS; /* only works for np>1 */ 769 break; 770 default: 771 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 772 } 773 } 774 775 options.ReplaceTinyPivot = NO; 776 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 777 if (set && flg) options.ReplaceTinyPivot = YES; 778 779 options.ParSymbFact = NO; 780 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 781 if (set && flg && size>1) { 782 #if defined(PETSC_HAVE_PARMETIS) 783 options.ParSymbFact = YES; 784 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 785 #else 786 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 787 #endif 788 } 789 790 lu->FactPattern = SamePattern; 791 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 792 if (flg) { 793 switch (indx) { 794 case 0: 795 lu->FactPattern = SamePattern; 796 break; 797 case 1: 798 lu->FactPattern = SamePattern_SameRowPerm; 799 break; 800 case 2: 801 lu->FactPattern = DOFACT; 802 break; 803 } 804 } 805 806 options.IterRefine = NOREFINE; 807 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 808 if (set) { 809 if (flg) options.IterRefine = SLU_DOUBLE; 810 else options.IterRefine = NOREFINE; 811 } 812 813 if (PetscLogPrintInfo) options.PrintStat = YES; 814 else options.PrintStat = NO; 815 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 816 ierr = PetscOptionsEnd();CHKERRQ(ierr); 817 818 lu->options = options; 819 lu->options.Fact = DOFACT; 820 lu->matsolve_iscalled = PETSC_FALSE; 821 lu->matmatsolve_iscalled = PETSC_FALSE; 822 823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 825 826 *F = B; 827 PetscFunctionReturn(0); 828 } 829 830 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 831 { 832 PetscErrorCode ierr; 833 PetscFunctionBegin; 834 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 835 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 836 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 837 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 /*MC 842 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 843 844 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 845 846 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 847 848 Works with AIJ matrices 849 850 Options Database Keys: 851 + -mat_superlu_dist_r <n> - number of rows in processor partition 852 . -mat_superlu_dist_c <n> - number of columns in processor partition 853 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 854 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided 855 . -mat_superlu_dist_equil - equilibrate the matrix 856 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 857 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 858 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 859 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 860 . -mat_superlu_dist_iterrefine - use iterative refinement 861 - -mat_superlu_dist_statprint - print factorization information 862 863 Notes: 864 If PETSc was configured with --with-cuda than this solver will automatically use the GPUs. 865 866 Level: beginner 867 868 .seealso: PCLU 869 870 .seealso: PCFactorSetMatSolverType(), MatSolverType 871 872 M*/ 873