1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 PetscInt *irn,*jcn,sym,nSolve; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 VecScatter scat_rhs, scat_sol; 40 PetscTruth isAIJ,CleanUpMUMPS; 41 Vec b_seq,x_seq; 42 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 43 PetscErrorCode (*MatView)(Mat,PetscViewer); 44 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 45 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 47 PetscErrorCode (*MatDestroy)(Mat); 48 PetscErrorCode (*specialdestroy)(Mat); 49 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 50 } Mat_MUMPS; 51 52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 53 EXTERN_C_BEGIN 54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 55 EXTERN_C_END 56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 57 /* 58 input: 59 A - matrix in mpiaij or mpisbaij (bs=1) format 60 shift - 0: C style output triple; 1: Fortran style output triple. 61 valOnly - FALSE: spaces are allocated and values are set for the triple 62 TRUE: only the values in v array are updated 63 output: 64 nnz - dim of r, c, and v (number of local nonzero entries of A) 65 r, c, v - row and col index, matrix values (matrix triples) 66 */ 67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 68 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 69 PetscErrorCode ierr; 70 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 71 PetscInt *row,*col; 72 PetscScalar *av, *bv,*val; 73 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 74 75 PetscFunctionBegin; 76 if (mumps->isAIJ){ 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 78 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 79 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 80 nz = aa->nz + bb->nz; 81 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 82 garray = mat->garray; 83 av=aa->a; bv=bb->a; 84 85 } else { 86 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 87 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 88 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 89 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 90 nz = aa->nz + bb->nz; 91 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 92 garray = mat->garray; 93 av=aa->a; bv=bb->a; 94 } 95 96 if (!valOnly){ 97 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 99 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 100 *r = row; *c = col; *v = val; 101 } else { 102 row = *r; col = *c; val = *v; 103 } 104 *nnz = nz; 105 106 jj = 0; irow = rstart; 107 for ( i=0; i<m; i++ ) { 108 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 109 countA = ai[i+1] - ai[i]; 110 countB = bi[i+1] - bi[i]; 111 bjj = bj + bi[i]; 112 113 /* get jB, the starting local col index for the 2nd B-part */ 114 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 115 j=-1; 116 do { 117 j++; 118 if (j == countB) break; 119 jcol = garray[bjj[j]]; 120 } while (jcol < colA_start); 121 jB = j; 122 123 /* B-part, smaller col index */ 124 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 125 for (j=0; j<jB; j++){ 126 jcol = garray[bjj[j]]; 127 if (!valOnly){ 128 row[jj] = irow + shift; col[jj] = jcol + shift; 129 130 } 131 val[jj++] = *bv++; 132 } 133 /* A-part */ 134 for (j=0; j<countA; j++){ 135 if (!valOnly){ 136 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 137 } 138 val[jj++] = *av++; 139 } 140 /* B-part, larger col index */ 141 for (j=jB; j<countB; j++){ 142 if (!valOnly){ 143 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 144 } 145 val[jj++] = *bv++; 146 } 147 irow++; 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 EXTERN_C_BEGIN 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatConvert_MUMPS_Base" 156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) 157 { 158 PetscErrorCode ierr; 159 Mat B=*newmat; 160 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 161 void (*f)(void); 162 163 PetscFunctionBegin; 164 if (reuse == MAT_INITIAL_MATRIX) { 165 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 166 } 167 B->ops->duplicate = mumps->MatDuplicate; 168 B->ops->view = mumps->MatView; 169 B->ops->assemblyend = mumps->MatAssemblyEnd; 170 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 171 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 172 B->ops->destroy = mumps->MatDestroy; 173 174 /* put back original composed preallocation function */ 175 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 176 if (f) { 177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 178 } 179 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 188 189 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 190 *newmat = B; 191 PetscFunctionReturn(0); 192 } 193 EXTERN_C_END 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatDestroy_MUMPS" 197 PetscErrorCode MatDestroy_MUMPS(Mat A) 198 { 199 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 200 PetscErrorCode ierr; 201 PetscMPIInt size=lu->size; 202 PetscErrorCode (*specialdestroy)(Mat); 203 PetscFunctionBegin; 204 if (lu->CleanUpMUMPS) { 205 /* Terminate instance, deallocate memories */ 206 if (size > 1){ 207 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 208 ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr); 209 ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr); 210 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 211 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 212 ierr = PetscFree(lu->val);CHKERRQ(ierr); 213 } 214 lu->id.job=JOB_END; 215 #if defined(PETSC_USE_COMPLEX) 216 zmumps_c(&lu->id); 217 #else 218 dmumps_c(&lu->id); 219 #endif 220 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 221 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 222 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 223 } 224 specialdestroy = lu->specialdestroy; 225 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 226 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNCT__ 231 #define __FUNCT__ "MatDestroy_AIJMUMPS" 232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 233 { 234 PetscErrorCode ierr; 235 PetscMPIInt size; 236 237 PetscFunctionBegin; 238 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 239 if (size==1) { 240 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 241 } else { 242 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 243 } 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 250 { 251 PetscErrorCode ierr; 252 PetscMPIInt size; 253 254 PetscFunctionBegin; 255 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 256 if (size==1) { 257 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 258 } else { 259 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 260 } 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatSolve_MUMPS" 266 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) { 267 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 268 PetscScalar *array; 269 Vec x_seq; 270 IS is_iden,is_petsc; 271 VecScatter scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol; 272 PetscErrorCode ierr; 273 PetscInt i; 274 275 PetscFunctionBegin; 276 lu->id.nrhs = 1; 277 x_seq = lu->b_seq; 278 if (lu->size > 1){ 279 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 280 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 281 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr); 282 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 283 } else { /* size == 1 */ 284 ierr = VecCopy(b,x);CHKERRQ(ierr); 285 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 286 } 287 if (!lu->myid) { /* define rhs on the host */ 288 #if defined(PETSC_USE_COMPLEX) 289 lu->id.rhs = (mumps_double_complex*)array; 290 #else 291 lu->id.rhs = array; 292 #endif 293 } 294 if (lu->size == 1){ 295 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 296 } else if (!lu->myid){ 297 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 298 } 299 300 if (lu->size > 1){ 301 /* distributed solution */ 302 lu->id.ICNTL(21) = 1; 303 if (!lu->nSolve){ 304 /* Create x_seq=sol_loc for repeated use */ 305 PetscInt lsol_loc; 306 PetscScalar *sol_loc; 307 lsol_loc = lu->id.INFO(23); /* length of sol_loc */ 308 ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr); 309 lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc); 310 lu->id.lsol_loc = lsol_loc; 311 #if defined(PETSC_USE_COMPLEX) 312 lu->id.sol_loc = (ZMUMPS_DOUBLE *)sol_loc; 313 #else 314 lu->id.sol_loc = (DMUMPS_DOUBLE *)sol_loc; 315 #endif 316 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr); 317 } 318 } 319 320 /* solve phase */ 321 /*-------------*/ 322 lu->id.job = 3; 323 #if defined(PETSC_USE_COMPLEX) 324 zmumps_c(&lu->id); 325 #else 326 dmumps_c(&lu->id); 327 #endif 328 if (lu->id.INFOG(1) < 0) { 329 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 330 } 331 332 if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 333 if (!lu->nSolve){ /* create scatter scat_sol */ 334 ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 335 for (i=0; i<lu->id.lsol_loc; i++){ 336 lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 337 } 338 ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr); /* to */ 339 ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr); 340 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 341 ierr = ISDestroy(is_petsc);CHKERRQ(ierr); 342 } 343 ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 344 ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr); 345 } 346 lu->nSolve++; 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 input: 352 F: numeric factor 353 output: 354 nneg: total number of negative pivots 355 nzero: 0 356 npos: (global dimension of F) - nneg 357 */ 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 361 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 362 { 363 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 364 PetscErrorCode ierr; 365 PetscMPIInt size; 366 367 PetscFunctionBegin; 368 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 369 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 370 if (size > 1 && lu->id.ICNTL(13) != 1){ 371 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 372 } 373 if (nneg){ 374 if (!lu->myid){ 375 *nneg = lu->id.INFOG(12); 376 } 377 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 378 } 379 if (nzero) *nzero = 0; 380 if (npos) *npos = F->rmap.N - (*nneg); 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatFactorNumeric_MUMPS" 386 PetscErrorCode MatFactorNumeric_MUMPS(Mat A,MatFactorInfo *info,Mat *F) 387 { 388 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 389 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 390 PetscErrorCode ierr; 391 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 392 PetscTruth valOnly,flg; 393 Mat F_diag; 394 395 PetscFunctionBegin; 396 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 397 (*F)->ops->solve = MatSolve_MUMPS; 398 399 /* Initialize a MUMPS instance */ 400 ierr = MPI_Comm_rank(A->comm, &lu->myid); 401 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 402 lua->myid = lu->myid; lua->size = lu->size; 403 lu->id.job = JOB_INIT; 404 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 405 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 406 407 /* Set mumps options */ 408 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 409 lu->id.par=1; /* host participates factorizaton and solve */ 410 lu->id.sym=lu->sym; 411 if (lu->sym == 2){ 412 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 413 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 414 } 415 #if defined(PETSC_USE_COMPLEX) 416 zmumps_c(&lu->id); 417 #else 418 dmumps_c(&lu->id); 419 #endif 420 421 if (lu->size == 1){ 422 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 423 } else { 424 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 425 } 426 427 icntl=-1; 428 lu->id.ICNTL(4) = 0; /* level of printing; overwrite mumps default ICNTL(4)=2 */ 429 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 430 if ((flg && icntl > 0) || PetscLogPrintInfo) { 431 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 432 } else { /* no output */ 433 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 434 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 435 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 436 } 437 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 438 icntl=-1; 439 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 440 if (flg) { 441 if (icntl== 1){ 442 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 443 } else { 444 lu->id.ICNTL(7) = icntl; 445 } 446 } 447 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 449 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 450 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 451 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 452 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 453 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 454 455 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 456 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 457 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 458 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr); 459 PetscOptionsEnd(); 460 } 461 462 /* define matrix A */ 463 switch (lu->id.ICNTL(18)){ 464 case 0: /* centralized assembled matrix input (size=1) */ 465 if (!lu->myid) { 466 if (lua->isAIJ){ 467 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 468 nz = aa->nz; 469 ai = aa->i; aj = aa->j; lu->val = aa->a; 470 } else { 471 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 472 nz = aa->nz; 473 ai = aa->i; aj = aa->j; lu->val = aa->a; 474 } 475 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 476 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 477 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 478 nz = 0; 479 for (i=0; i<M; i++){ 480 rnz = ai[i+1] - ai[i]; 481 while (rnz--) { /* Fortran row/col index! */ 482 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 483 } 484 } 485 } 486 } 487 break; 488 case 3: /* distributed assembled matrix input (size>1) */ 489 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 490 valOnly = PETSC_FALSE; 491 } else { 492 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 493 } 494 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 495 break; 496 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 497 } 498 499 /* analysis phase */ 500 /*----------------*/ 501 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 502 lu->id.job = 1; 503 504 lu->id.n = M; 505 switch (lu->id.ICNTL(18)){ 506 case 0: /* centralized assembled matrix input */ 507 if (!lu->myid) { 508 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 509 if (lu->id.ICNTL(6)>1){ 510 #if defined(PETSC_USE_COMPLEX) 511 lu->id.a = (mumps_double_complex*)lu->val; 512 #else 513 lu->id.a = lu->val; 514 #endif 515 } 516 } 517 break; 518 case 3: /* distributed assembled matrix input (size>1) */ 519 lu->id.nz_loc = nnz; 520 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 521 if (lu->id.ICNTL(6)>1) { 522 #if defined(PETSC_USE_COMPLEX) 523 lu->id.a_loc = (mumps_double_complex*)lu->val; 524 #else 525 lu->id.a_loc = lu->val; 526 #endif 527 } 528 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 529 IS is_iden; 530 Vec b; 531 if (!lu->myid){ 532 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr); 533 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr); 534 } else { 535 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr); 536 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 537 } 538 ierr = VecCreate(A->comm,&b);CHKERRQ(ierr); 539 ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr); 540 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 541 542 ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr); 543 ierr = ISDestroy(is_iden);CHKERRQ(ierr); 544 ierr = VecDestroy(b);CHKERRQ(ierr); 545 break; 546 } 547 #if defined(PETSC_USE_COMPLEX) 548 zmumps_c(&lu->id); 549 #else 550 dmumps_c(&lu->id); 551 #endif 552 if (lu->id.INFOG(1) < 0) { 553 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 554 } 555 } 556 557 /* numerical factorization phase */ 558 /*-------------------------------*/ 559 lu->id.job = 2; 560 if(!lu->id.ICNTL(18)) { 561 if (!lu->myid) { 562 #if defined(PETSC_USE_COMPLEX) 563 lu->id.a = (mumps_double_complex*)lu->val; 564 #else 565 lu->id.a = lu->val; 566 #endif 567 } 568 } else { 569 #if defined(PETSC_USE_COMPLEX) 570 lu->id.a_loc = (mumps_double_complex*)lu->val; 571 #else 572 lu->id.a_loc = lu->val; 573 #endif 574 } 575 #if defined(PETSC_USE_COMPLEX) 576 zmumps_c(&lu->id); 577 #else 578 dmumps_c(&lu->id); 579 #endif 580 if (lu->id.INFOG(1) < 0) { 581 if (lu->id.INFO(1) == -13) { 582 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 583 } else { 584 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 585 } 586 } 587 588 if (!lu->myid && lu->id.ICNTL(16) > 0){ 589 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 590 } 591 592 if (lu->size > 1){ 593 if ((*F)->factor == FACTOR_LU){ 594 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 595 } else { 596 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 597 } 598 F_diag->assembled = PETSC_TRUE; 599 if (lu->nSolve){ 600 ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr); 601 ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr); 602 ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr); 603 } 604 } 605 (*F)->assembled = PETSC_TRUE; 606 lu->matstruc = SAME_NONZERO_PATTERN; 607 lu->CleanUpMUMPS = PETSC_TRUE; 608 lu->nSolve = 0; 609 PetscFunctionReturn(0); 610 } 611 612 /* Note the Petsc r and c permutations are ignored */ 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 615 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 616 Mat B; 617 Mat_MUMPS *lu; 618 PetscErrorCode ierr; 619 620 PetscFunctionBegin; 621 /* Create the factorization matrix */ 622 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 623 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 624 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 625 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 626 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 627 628 B->ops->lufactornumeric = MatFactorNumeric_MUMPS; 629 B->factor = FACTOR_LU; 630 lu = (Mat_MUMPS*)B->spptr; 631 lu->sym = 0; 632 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 633 634 *F = B; 635 PetscFunctionReturn(0); 636 } 637 638 /* Note the Petsc r permutation is ignored */ 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 641 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 642 Mat B; 643 Mat_MUMPS *lu; 644 PetscErrorCode ierr; 645 646 PetscFunctionBegin; 647 /* Create the factorization matrix */ 648 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 649 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 650 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 651 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 652 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 653 654 B->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 655 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 656 B->factor = FACTOR_CHOLESKY; 657 lu = (Mat_MUMPS*)B->spptr; 658 lu->sym = 2; 659 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 660 661 *F = B; 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatFactorInfo_MUMPS" 667 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 668 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 669 PetscErrorCode ierr; 670 671 PetscFunctionBegin; 672 /* check if matrix is mumps type */ 673 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 674 675 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 676 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 677 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 678 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",lu->id.ICNTL(1));CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",lu->id.ICNTL(3));CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 682 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 683 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 684 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 685 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",lu->id.ICNTL(8));CHKERRQ(ierr); 686 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 687 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 688 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 689 if (!lu->myid && lu->id.ICNTL(11)>0) { 690 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 691 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 692 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 693 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 694 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 695 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 696 697 } 698 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 699 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 700 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 701 /* ICNTL(15-17) not used */ 702 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 703 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",lu->id.ICNTL(19));CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",lu->id.ICNTL(20));CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",lu->id.ICNTL(21));CHKERRQ(ierr); 706 707 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 708 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 709 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 710 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",lu->id.CNTL(4));CHKERRQ(ierr); 711 712 /* infomation local to each processor */ 713 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);} 714 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 715 ierr = PetscSynchronizedFlush(A->comm); 716 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);} 717 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 718 ierr = PetscSynchronizedFlush(A->comm); 719 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);} 720 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 721 ierr = PetscSynchronizedFlush(A->comm); 722 /* 723 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);} 724 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr); 725 ierr = PetscSynchronizedFlush(A->comm); 726 */ 727 728 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);} 729 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr); 730 ierr = PetscSynchronizedFlush(A->comm); 731 732 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);} 733 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr); 734 ierr = PetscSynchronizedFlush(A->comm); 735 736 if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);} 737 ierr = PetscSynchronizedPrintf(A->comm," [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr); 738 ierr = PetscSynchronizedFlush(A->comm); 739 740 if (!lu->myid){ /* information from the host */ 741 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 742 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 743 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 744 745 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 746 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 747 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 748 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 749 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 750 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 751 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 752 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 753 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 754 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 755 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 756 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 757 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 758 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 759 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 760 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 761 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 762 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 763 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr); 764 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr); 765 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr); 766 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr); 767 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr); 768 } 769 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatView_MUMPS" 775 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 776 PetscErrorCode ierr; 777 PetscTruth iascii; 778 PetscViewerFormat format; 779 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 780 781 PetscFunctionBegin; 782 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 783 784 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 785 if (iascii) { 786 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 787 if (format == PETSC_VIEWER_ASCII_INFO){ 788 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 789 } 790 } 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 796 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 797 PetscErrorCode ierr; 798 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 799 800 PetscFunctionBegin; 801 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 802 803 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 804 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 805 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 806 PetscFunctionReturn(0); 807 } 808 809 EXTERN_C_BEGIN 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 812 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 813 { 814 PetscErrorCode ierr; 815 PetscMPIInt size; 816 MPI_Comm comm; 817 Mat B=*newmat; 818 Mat_MUMPS *mumps; 819 820 PetscFunctionBegin; 821 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 822 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 823 824 if (reuse == MAT_INITIAL_MATRIX) { 825 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 826 /* A may have special container that is not duplicated, 827 e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */ 828 mumps->MatDuplicate = B->ops->duplicate; 829 mumps->MatView = B->ops->view; 830 mumps->MatAssemblyEnd = B->ops->assemblyend; 831 mumps->MatLUFactorSymbolic = B->ops->lufactorsymbolic; 832 mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic; 833 mumps->MatDestroy = B->ops->destroy; 834 } else { 835 mumps->MatDuplicate = A->ops->duplicate; 836 mumps->MatView = A->ops->view; 837 mumps->MatAssemblyEnd = A->ops->assemblyend; 838 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 839 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 840 mumps->MatDestroy = A->ops->destroy; 841 } 842 mumps->specialdestroy = MatDestroy_AIJMUMPS; 843 mumps->CleanUpMUMPS = PETSC_FALSE; 844 mumps->isAIJ = PETSC_TRUE; 845 846 B->spptr = (void*)mumps; 847 B->ops->duplicate = MatDuplicate_MUMPS; 848 B->ops->view = MatView_MUMPS; 849 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 850 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 851 B->ops->destroy = MatDestroy_MUMPS; 852 853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 854 if (size == 1) { 855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 856 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 858 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 859 } else { 860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 861 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 863 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 864 } 865 866 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 867 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 868 *newmat = B; 869 PetscFunctionReturn(0); 870 } 871 EXTERN_C_END 872 873 /*MC 874 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 875 and sequential matrices via the external package MUMPS. 876 877 If MUMPS is installed (see the manual for instructions 878 on how to declare the existence of external packages), 879 a matrix type can be constructed which invokes MUMPS solvers. 880 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 881 882 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 883 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 884 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 885 for communicators controlling multiple processes. It is recommended that you call both of 886 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 887 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 888 without data copy. 889 890 Options Database Keys: 891 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 892 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 893 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 894 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 895 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 896 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 897 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 898 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 899 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 900 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 901 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 902 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 903 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 904 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 905 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 906 907 Level: beginner 908 909 .seealso: MATSBAIJMUMPS 910 M*/ 911 912 EXTERN_C_BEGIN 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatCreate_AIJMUMPS" 915 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 916 { 917 PetscErrorCode ierr; 918 PetscMPIInt size; 919 920 PetscFunctionBegin; 921 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 922 if (size == 1) { 923 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 924 } else { 925 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 926 /* 927 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 928 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 929 */ 930 } 931 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 EXTERN_C_END 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 938 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 939 { 940 PetscErrorCode ierr; 941 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 942 943 PetscFunctionBegin; 944 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 945 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 946 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 947 PetscFunctionReturn(0); 948 } 949 950 EXTERN_C_BEGIN 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 953 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 954 { 955 Mat A; 956 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 /* 961 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 962 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 963 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 964 block size info so that PETSc can determine the local size properly. The block size info is set 965 in the preallocation routine. 966 */ 967 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 968 A = ((Mat_MPISBAIJ *)B->data)->A; 969 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 EXTERN_C_END 973 974 EXTERN_C_BEGIN 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 977 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 978 { 979 PetscErrorCode ierr; 980 PetscMPIInt size; 981 MPI_Comm comm; 982 Mat B=*newmat; 983 Mat_MUMPS *mumps; 984 void (*f)(void); 985 986 PetscFunctionBegin; 987 if (reuse == MAT_INITIAL_MATRIX) { 988 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 989 } 990 991 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 992 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 993 994 mumps->MatDuplicate = A->ops->duplicate; 995 mumps->MatView = A->ops->view; 996 mumps->MatAssemblyEnd = A->ops->assemblyend; 997 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 998 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 999 mumps->MatDestroy = A->ops->destroy; 1000 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 1001 mumps->CleanUpMUMPS = PETSC_FALSE; 1002 mumps->isAIJ = PETSC_FALSE; 1003 1004 B->spptr = (void*)mumps; 1005 B->ops->duplicate = MatDuplicate_MUMPS; 1006 B->ops->view = MatView_MUMPS; 1007 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 1008 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 1009 B->ops->destroy = MatDestroy_MUMPS; 1010 1011 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1012 if (size == 1) { 1013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 1014 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 1016 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1017 } else { 1018 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 1019 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 1020 if (f) { /* This case should always be true when this routine is called */ 1021 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 1022 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 1023 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 1024 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 1025 } 1026 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 1027 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 1029 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 1030 } 1031 1032 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 1033 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 1034 *newmat = B; 1035 PetscFunctionReturn(0); 1036 } 1037 EXTERN_C_END 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatDuplicate_MUMPS" 1041 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 1042 PetscErrorCode ierr; 1043 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 1044 1045 PetscFunctionBegin; 1046 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 1047 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 /*MC 1052 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 1053 distributed and sequential matrices via the external package MUMPS. 1054 1055 If MUMPS is installed (see the manual for instructions 1056 on how to declare the existence of external packages), 1057 a matrix type can be constructed which invokes MUMPS solvers. 1058 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 1059 1060 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 1061 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 1062 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 1063 for communicators controlling multiple processes. It is recommended that you call both of 1064 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 1065 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 1066 without data copy. 1067 1068 Options Database Keys: 1069 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 1070 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 1071 . -mat_mumps_icntl_4 <0,...,4> - print level 1072 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1073 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 1074 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1075 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1076 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1077 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1078 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1079 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1080 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1081 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1082 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1083 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1084 1085 Level: beginner 1086 1087 .seealso: MATAIJMUMPS 1088 M*/ 1089 1090 EXTERN_C_BEGIN 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1093 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1094 { 1095 PetscErrorCode ierr; 1096 PetscMPIInt size; 1097 1098 PetscFunctionBegin; 1099 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1100 if (size == 1) { 1101 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1102 } else { 1103 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1104 } 1105 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1106 PetscFunctionReturn(0); 1107 } 1108 EXTERN_C_END 1109