1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #include <superlu_zdefs.h> 12 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(6,3,0) 13 #define LUstructInit zLUstructInit 14 #define ScalePermstructInit zScalePermstructInit 15 #define ScalePermstructFree zScalePermstructFree 16 #define LUstructFree zLUstructFree 17 #define Destroy_LU zDestroy_LU 18 #define ScalePermstruct_t zScalePermstruct_t 19 #define LUstruct_t zLUstruct_t 20 #define SOLVEstruct_t zSOLVEstruct_t 21 #endif 22 #else 23 #include <superlu_ddefs.h> 24 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(6,3,0) 25 #define LUstructInit dLUstructInit 26 #define ScalePermstructInit dScalePermstructInit 27 #define ScalePermstructFree dScalePermstructFree 28 #define LUstructFree dLUstructFree 29 #define Destroy_LU dDestroy_LU 30 #define ScalePermstruct_t dScalePermstruct_t 31 #define LUstruct_t dLUstruct_t 32 #define SOLVEstruct_t dSOLVEstruct_t 33 #endif 34 #endif 35 EXTERN_C_END 36 37 typedef struct { 38 int_t nprow,npcol,*row,*col; 39 gridinfo_t grid; 40 superlu_dist_options_t options; 41 SuperMatrix A_sup; 42 ScalePermstruct_t ScalePermstruct; 43 LUstruct_t LUstruct; 44 int StatPrint; 45 SOLVEstruct_t SOLVEstruct; 46 fact_t FactPattern; 47 MPI_Comm comm_superlu; 48 #if defined(PETSC_USE_COMPLEX) 49 doublecomplex *val; 50 #else 51 double *val; 52 #endif 53 PetscBool matsolve_iscalled,matmatsolve_iscalled; 54 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 55 } Mat_SuperLU_DIST; 56 57 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 58 { 59 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 60 61 PetscFunctionBegin; 62 #if defined(PETSC_USE_COMPLEX) 63 PetscStackCall("SuperLU_DIST:pzGetDiagU",pzGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,(doublecomplex*)diagU)); 64 #else 65 PetscStackCall("SuperLU_DIST:pdGetDiagU",pdGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,diagU)); 66 #endif 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 71 { 72 PetscErrorCode ierr; 73 74 PetscFunctionBegin; 75 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 76 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 81 typedef struct { 82 MPI_Comm comm; 83 PetscBool busy; 84 gridinfo_t grid; 85 } PetscSuperLU_DIST; 86 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 87 88 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *attr_val,void *extra_state) 89 { 90 PetscErrorCode ierr; 91 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *) attr_val; 92 93 PetscFunctionBegin; 94 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Unexpected keyval"); 95 ierr = PetscInfo(NULL,"Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n");CHKERRQ(ierr); 96 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&context->grid)); 97 ierr = MPI_Comm_free(&context->comm);CHKERRMPI(ierr); 98 ierr = PetscFree(context);CHKERRQ(ierr); 99 PetscFunctionReturn(MPI_SUCCESS); 100 } 101 102 /* 103 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 104 users who do not destroy all PETSc objects before PetscFinalize(). 105 106 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 107 can still check that the keyval associated with the MPI communicator is correct when the MPI 108 communicator is destroyed. 109 110 This is called in PetscFinalize() 111 */ 112 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 113 { 114 PetscErrorCode ierr; 115 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 116 117 PetscFunctionBegin; 118 ierr = PetscInfo(NULL,"Freeing Petsc_Superlu_dist_keyval\n");CHKERRQ(ierr); 119 ierr = MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp);CHKERRMPI(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 124 { 125 PetscErrorCode ierr; 126 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 127 128 PetscFunctionBegin; 129 if (lu->CleanUpSuperLU_Dist) { 130 /* Deallocate SuperLU_DIST storage */ 131 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 132 if (lu->options.SolveInitialized) { 133 #if defined(PETSC_USE_COMPLEX) 134 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 135 #else 136 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 137 #endif 138 } 139 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 140 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 141 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 142 143 /* Release the SuperLU_DIST process grid. Only if the matrix has its own copy, this is it is not in the communicator context */ 144 if (lu->comm_superlu) { 145 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 146 ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);CHKERRQ(ierr); 147 } else { 148 PetscSuperLU_DIST *context; 149 MPI_Comm comm; 150 PetscMPIInt flg; 151 152 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 153 ierr = MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);CHKERRMPI(ierr); 154 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 155 context->busy = PETSC_FALSE; 156 } 157 } 158 ierr = PetscFree(A->data);CHKERRQ(ierr); 159 /* clear composed functions */ 160 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 161 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 162 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 167 { 168 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 169 PetscErrorCode ierr; 170 PetscInt m=A->rmap->n; 171 SuperLUStat_t stat; 172 double berr[1]; 173 PetscScalar *bptr=NULL; 174 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 175 static PetscBool cite = PETSC_FALSE; 176 177 PetscFunctionBegin; 178 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 179 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 180 181 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 182 /* see comments in MatMatSolve() */ 183 #if defined(PETSC_USE_COMPLEX) 184 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 185 #else 186 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 187 #endif 188 lu->options.SolveInitialized = NO; 189 } 190 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 191 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 192 193 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 194 #if defined(PETSC_USE_COMPLEX) 195 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 196 #else 197 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 198 #endif 199 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 200 201 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 202 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 203 204 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 205 lu->matsolve_iscalled = PETSC_TRUE; 206 lu->matmatsolve_iscalled = PETSC_FALSE; 207 PetscFunctionReturn(0); 208 } 209 210 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 211 { 212 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 213 PetscErrorCode ierr; 214 PetscInt m=A->rmap->n,nrhs; 215 SuperLUStat_t stat; 216 double berr[1]; 217 PetscScalar *bptr; 218 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 219 PetscBool flg; 220 221 PetscFunctionBegin; 222 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 223 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 224 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 225 if (X != B_mpi) { 226 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 227 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 228 } 229 230 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 231 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 232 thus destroy it and create a new SOLVEstruct. 233 Otherwise it may result in memory corruption or incorrect solution 234 See src/mat/tests/ex125.c */ 235 #if defined(PETSC_USE_COMPLEX) 236 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 237 #else 238 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 239 #endif 240 lu->options.SolveInitialized = NO; 241 } 242 if (X != B_mpi) { 243 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 244 } 245 246 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 247 248 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 249 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 250 251 #if defined(PETSC_USE_COMPLEX) 252 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 253 #else 254 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 255 #endif 256 257 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 258 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 259 260 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 261 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 262 lu->matsolve_iscalled = PETSC_FALSE; 263 lu->matmatsolve_iscalled = PETSC_TRUE; 264 PetscFunctionReturn(0); 265 } 266 267 /* 268 input: 269 F: numeric Cholesky factor 270 output: 271 nneg: total number of negative pivots 272 nzero: total number of zero pivots 273 npos: (global dimension of F) - nneg - nzero 274 */ 275 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 276 { 277 PetscErrorCode ierr; 278 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 279 PetscScalar *diagU=NULL; 280 PetscInt M,i,neg=0,zero=0,pos=0; 281 PetscReal r; 282 283 PetscFunctionBegin; 284 if (PetscUnlikely(!F->assembled)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 285 if (PetscUnlikely(lu->options.RowPerm != NOROWPERM)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 286 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 287 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 288 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 289 for (i=0; i<M; i++) { 290 #if defined(PETSC_USE_COMPLEX) 291 r = PetscImaginaryPart(diagU[i])/10.0; 292 if (PetscUnlikely(r< -PETSC_MACHINE_EPSILON || r>PETSC_MACHINE_EPSILON)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%" PetscInt_FMT "]=%g + i %g is non-real",i,(double)PetscRealPart(diagU[i]),(double)(r*10.0)); 293 r = PetscRealPart(diagU[i]); 294 #else 295 r = diagU[i]; 296 #endif 297 if (r > 0) { 298 pos++; 299 } else if (r < 0) { 300 neg++; 301 } else zero++; 302 } 303 304 ierr = PetscFree(diagU);CHKERRQ(ierr); 305 if (nneg) *nneg = neg; 306 if (nzero) *nzero = zero; 307 if (npos) *npos = pos; 308 PetscFunctionReturn(0); 309 } 310 311 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 312 { 313 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 314 Mat Aloc; 315 const PetscScalar *av; 316 const PetscInt *ai=NULL,*aj=NULL; 317 PetscInt nz,dummy; 318 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 319 SuperLUStat_t stat; 320 double *berr=0; 321 PetscBool ismpiaij,isseqaij,flg; 322 PetscErrorCode ierr; 323 324 PetscFunctionBegin; 325 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 326 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 327 if (ismpiaij) { 328 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc);CHKERRQ(ierr); 329 } else if (isseqaij) { 330 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 331 Aloc = A; 332 } else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 333 334 ierr = MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 335 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 336 ierr = MatSeqAIJGetArrayRead(Aloc,&av);CHKERRQ(ierr); 337 nz = ai[Aloc->rmap->n]; 338 339 /* Allocations for A_sup */ 340 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 341 #if defined(PETSC_USE_COMPLEX) 342 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 343 #else 344 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 345 #endif 346 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 347 if (lu->FactPattern == SamePattern_SameRowPerm) { 348 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 349 } else if (lu->FactPattern == SamePattern) { 350 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); /* Deallocate L and U matrices. */ 351 lu->options.Fact = SamePattern; 352 } else if (lu->FactPattern == DOFACT) { 353 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 354 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 355 lu->options.Fact = DOFACT; 356 357 #if defined(PETSC_USE_COMPLEX) 358 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 359 #else 360 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 361 #endif 362 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 363 } 364 365 /* Copy AIJ matrix to superlu_dist matrix */ 366 ierr = PetscArraycpy(lu->row,ai,Aloc->rmap->n+1);CHKERRQ(ierr); 367 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 368 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 369 ierr = MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 370 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 371 ierr = MatSeqAIJRestoreArrayRead(Aloc,&av);CHKERRQ(ierr); 372 ierr = MatDestroy(&Aloc);CHKERRQ(ierr); 373 374 /* Create and setup A_sup */ 375 if (lu->options.Fact == DOFACT) { 376 #if defined(PETSC_USE_COMPLEX) 377 PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); 378 #else 379 PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); 380 #endif 381 } 382 383 /* Factor the matrix. */ 384 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 385 #if defined(PETSC_USE_COMPLEX) 386 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 387 #else 388 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 389 #endif 390 391 if (sinfo > 0) { 392 if (PetscUnlikely(A->erroriffailure)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %d",sinfo); 393 else { 394 if (sinfo <= lu->A_sup.ncol) { 395 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 396 ierr = PetscInfo1(F,"U(i,i) is exactly zero, i= %d\n",sinfo);CHKERRQ(ierr); 397 } else if (sinfo > lu->A_sup.ncol) { 398 /* 399 number of bytes allocated when memory allocation 400 failure occurred, plus A->ncol. 401 */ 402 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 403 ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %d\n",sinfo);CHKERRQ(ierr); 404 } 405 } 406 } else if (PetscUnlikely(sinfo < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 407 408 if (lu->options.PrintStat) { 409 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 410 } 411 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 412 F->assembled = PETSC_TRUE; 413 F->preallocated = PETSC_TRUE; 414 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 415 PetscFunctionReturn(0); 416 } 417 418 /* Note the Petsc r and c permutations are ignored */ 419 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 420 { 421 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 422 PetscInt M = A->rmap->N,N=A->cmap->N; 423 424 PetscFunctionBegin; 425 /* Initialize ScalePermstruct and LUstruct. */ 426 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 427 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 428 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 429 F->ops->solve = MatSolve_SuperLU_DIST; 430 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 431 F->ops->getinertia = NULL; 432 433 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 434 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 435 PetscFunctionReturn(0); 436 } 437 438 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 439 { 440 PetscErrorCode ierr; 441 442 PetscFunctionBegin; 443 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 444 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 445 PetscFunctionReturn(0); 446 } 447 448 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 449 { 450 PetscFunctionBegin; 451 *type = MATSOLVERSUPERLU_DIST; 452 PetscFunctionReturn(0); 453 } 454 455 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 456 { 457 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 458 superlu_dist_options_t options; 459 PetscErrorCode ierr; 460 461 PetscFunctionBegin; 462 /* check if matrix is superlu_dist type */ 463 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 464 465 options = lu->options; 466 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 467 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 468 * format spec for int64_t is set to %d for whatever reason */ 469 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %lld x npcol %lld \n",(long long)lu->nprow,(long long)lu->npcol);CHKERRQ(ierr); 470 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 471 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 472 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 473 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %lld col partition %lld \n",(long long)lu->nprow,(long long)lu->npcol);CHKERRQ(ierr); 474 475 switch (options.RowPerm) { 476 case NOROWPERM: 477 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 478 break; 479 case LargeDiag_MC64: 480 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 481 break; 482 case LargeDiag_AWPM: 483 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 484 break; 485 case MY_PERMR: 486 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 487 break; 488 default: 489 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 490 } 491 492 switch (options.ColPerm) { 493 case NATURAL: 494 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 495 break; 496 case MMD_AT_PLUS_A: 497 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 498 break; 499 case MMD_ATA: 500 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 501 break; 502 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 503 case METIS_AT_PLUS_A: 504 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 505 break; 506 case PARMETIS: 507 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 508 break; 509 default: 510 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 511 } 512 513 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 514 515 if (lu->FactPattern == SamePattern) { 516 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 517 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 518 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 519 } else if (lu->FactPattern == DOFACT) { 520 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 521 } else { 522 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 523 } 524 PetscFunctionReturn(0); 525 } 526 527 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 528 { 529 PetscErrorCode ierr; 530 PetscBool iascii; 531 PetscViewerFormat format; 532 533 PetscFunctionBegin; 534 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 535 if (iascii) { 536 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 537 if (format == PETSC_VIEWER_ASCII_INFO) { 538 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 539 } 540 } 541 PetscFunctionReturn(0); 542 } 543 544 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 545 { 546 Mat B; 547 Mat_SuperLU_DIST *lu; 548 PetscErrorCode ierr; 549 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 550 PetscMPIInt size; 551 superlu_dist_options_t options; 552 PetscBool flg; 553 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 554 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 555 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 556 PetscBool set; 557 558 PetscFunctionBegin; 559 /* Create the factorization matrix */ 560 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 561 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 562 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 563 ierr = MatSetUp(B);CHKERRQ(ierr); 564 B->ops->getinfo = MatGetInfo_External; 565 B->ops->view = MatView_SuperLU_DIST; 566 B->ops->destroy = MatDestroy_SuperLU_DIST; 567 568 /* Set the default input options: 569 options.Fact = DOFACT; 570 options.Equil = YES; 571 options.ParSymbFact = NO; 572 options.ColPerm = METIS_AT_PLUS_A; 573 options.RowPerm = LargeDiag_MC64; 574 options.ReplaceTinyPivot = YES; 575 options.IterRefine = DOUBLE; 576 options.Trans = NOTRANS; 577 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 578 options.RefineInitialized = NO; 579 options.PrintStat = YES; 580 options.SymPattern = NO; 581 */ 582 set_default_options_dist(&options); 583 584 B->trivialsymbolic = PETSC_TRUE; 585 if (ftype == MAT_FACTOR_LU) { 586 B->factortype = MAT_FACTOR_LU; 587 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 588 } else { 589 B->factortype = MAT_FACTOR_CHOLESKY; 590 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 591 options.SymPattern = YES; 592 } 593 594 /* set solvertype */ 595 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 596 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 597 598 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 599 B->data = lu; 600 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 601 602 { 603 PetscMPIInt flg; 604 MPI_Comm comm; 605 PetscSuperLU_DIST *context = NULL; 606 607 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 608 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 609 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_Superlu_dist_keyval_Delete_Fn,&Petsc_Superlu_dist_keyval,(void*)0);CHKERRMPI(ierr); 610 ierr = PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free);CHKERRQ(ierr); 611 } 612 ierr = MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);CHKERRMPI(ierr); 613 if (!flg || context->busy) { 614 if (!flg) { 615 ierr = PetscNew(&context);CHKERRQ(ierr); 616 context->busy = PETSC_TRUE; 617 ierr = MPI_Comm_dup(comm,&context->comm);CHKERRMPI(ierr); 618 ierr = MPI_Comm_set_attr(comm,Petsc_Superlu_dist_keyval,context);CHKERRMPI(ierr); 619 } else { 620 ierr = PetscCommGetComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);CHKERRQ(ierr); 621 } 622 623 /* Default number of process columns and rows */ 624 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 625 if (!lu->nprow) lu->nprow = 1; 626 while (lu->nprow > 0) { 627 lu->npcol = (int_t) (size/lu->nprow); 628 if (size == lu->nprow * lu->npcol) break; 629 lu->nprow--; 630 } 631 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 632 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 633 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 634 ierr = PetscOptionsEnd();CHKERRQ(ierr); 635 if (PetscUnlikely(size != lu->nprow * lu->npcol)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld",size,(long long)lu->nprow,(long long)lu->npcol); 636 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 637 if (context) context->grid = lu->grid; 638 ierr = PetscInfo(NULL,"Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n");CHKERRQ(ierr); 639 if (flg) { 640 ierr = PetscInfo(NULL,"Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n");CHKERRQ(ierr); 641 } else { 642 ierr = PetscInfo(NULL,"Storing communicator and SuperLU_DIST grid in communicator attribute\n");CHKERRQ(ierr); 643 } 644 } else { 645 ierr = PetscInfo(NULL,"Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.");CHKERRQ(ierr); 646 context->busy = PETSC_TRUE; 647 lu->grid = context->grid; 648 } 649 } 650 651 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 652 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 653 if (set && !flg) options.Equil = NO; 654 655 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 656 if (flg) { 657 switch (indx) { 658 case 0: 659 options.RowPerm = NOROWPERM; 660 break; 661 case 1: 662 options.RowPerm = LargeDiag_MC64; 663 break; 664 case 2: 665 options.RowPerm = LargeDiag_AWPM; 666 break; 667 case 3: 668 options.RowPerm = MY_PERMR; 669 break; 670 default: 671 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 672 } 673 } 674 675 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 676 if (flg) { 677 switch (indx) { 678 case 0: 679 options.ColPerm = NATURAL; 680 break; 681 case 1: 682 options.ColPerm = MMD_AT_PLUS_A; 683 break; 684 case 2: 685 options.ColPerm = MMD_ATA; 686 break; 687 case 3: 688 options.ColPerm = METIS_AT_PLUS_A; 689 break; 690 case 4: 691 options.ColPerm = PARMETIS; /* only works for np>1 */ 692 break; 693 default: 694 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 695 } 696 } 697 698 options.ReplaceTinyPivot = NO; 699 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 700 if (set && flg) options.ReplaceTinyPivot = YES; 701 702 options.ParSymbFact = NO; 703 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 704 if (set && flg && size>1) { 705 #if defined(PETSC_HAVE_PARMETIS) 706 options.ParSymbFact = YES; 707 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 708 #else 709 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 710 #endif 711 } 712 713 lu->FactPattern = SamePattern; 714 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 715 if (flg) { 716 switch (indx) { 717 case 0: 718 lu->FactPattern = SamePattern; 719 break; 720 case 1: 721 lu->FactPattern = SamePattern_SameRowPerm; 722 break; 723 case 2: 724 lu->FactPattern = DOFACT; 725 break; 726 } 727 } 728 729 options.IterRefine = NOREFINE; 730 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 731 if (set) { 732 if (flg) options.IterRefine = SLU_DOUBLE; 733 else options.IterRefine = NOREFINE; 734 } 735 736 if (PetscLogPrintInfo) options.PrintStat = YES; 737 else options.PrintStat = NO; 738 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 739 ierr = PetscOptionsEnd();CHKERRQ(ierr); 740 741 lu->options = options; 742 lu->options.Fact = DOFACT; 743 lu->matsolve_iscalled = PETSC_FALSE; 744 lu->matmatsolve_iscalled = PETSC_FALSE; 745 746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 748 749 *F = B; 750 PetscFunctionReturn(0); 751 } 752 753 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 754 { 755 PetscErrorCode ierr; 756 PetscFunctionBegin; 757 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 758 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 759 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 760 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 /*MC 765 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 766 767 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 768 769 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 770 771 Works with AIJ matrices 772 773 Options Database Keys: 774 + -mat_superlu_dist_r <n> - number of rows in processor partition 775 . -mat_superlu_dist_c <n> - number of columns in processor partition 776 . -mat_superlu_dist_equil - equilibrate the matrix 777 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 778 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 779 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 780 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 781 . -mat_superlu_dist_iterrefine - use iterative refinement 782 - -mat_superlu_dist_statprint - print factorization information 783 784 Level: beginner 785 786 .seealso: PCLU 787 788 .seealso: PCFactorSetMatSolverType(), MatSolverType 789 790 M*/ 791