1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES)) { 71 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 72 } 73 *b = (PetscMUMPSInt)(a); 74 PetscFunctionReturn(0); 75 } 76 77 /* Put these utility routines here since they are only used in this file */ 78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 79 { 80 PetscErrorCode ierr; 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 86 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 87 if (set) *set = myset; 88 PetscFunctionReturn(0); 89 } 90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 MUMPS_c(&mumps->id); \ 100 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 } \ 102 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 109 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 110 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { MUMPS_c(&mumps->id); } while (0) 118 #endif 119 120 /* declare MumpsScalar */ 121 #if defined(PETSC_USE_COMPLEX) 122 #if defined(PETSC_USE_REAL_SINGLE) 123 #define MumpsScalar mumps_complex 124 #else 125 #define MumpsScalar mumps_double_complex 126 #endif 127 #else 128 #define MumpsScalar PetscScalar 129 #endif 130 131 /* macros s.t. indices match MUMPS documentation */ 132 #define ICNTL(I) icntl[(I)-1] 133 #define CNTL(I) cntl[(I)-1] 134 #define INFOG(I) infog[(I)-1] 135 #define INFO(I) info[(I)-1] 136 #define RINFOG(I) rinfog[(I)-1] 137 #define RINFO(I) rinfo[(I)-1] 138 139 typedef struct Mat_MUMPS Mat_MUMPS; 140 struct Mat_MUMPS { 141 #if defined(PETSC_USE_COMPLEX) 142 #if defined(PETSC_USE_REAL_SINGLE) 143 CMUMPS_STRUC_C id; 144 #else 145 ZMUMPS_STRUC_C id; 146 #endif 147 #else 148 #if defined(PETSC_USE_REAL_SINGLE) 149 SMUMPS_STRUC_C id; 150 #else 151 DMUMPS_STRUC_C id; 152 #endif 153 #endif 154 155 MatStructure matstruc; 156 PetscMPIInt myid,petsc_size; 157 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 158 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 159 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 160 PetscMUMPSInt sym; 161 MPI_Comm mumps_comm; 162 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 163 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 164 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 165 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 166 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 167 PetscInt *rhs_nrow,max_nrhs; 168 PetscMPIInt *rhs_recvcounts,*rhs_disps; 169 PetscScalar *rhs_loc,*rhs_recvbuf; 170 #endif 171 Vec b_seq,x_seq; 172 PetscInt ninfo,*info; /* which INFO to display */ 173 PetscInt sizeredrhs; 174 PetscScalar *schur_sol; 175 PetscInt schur_sizesol; 176 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 177 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 178 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 179 180 /* stuff used by petsc/mumps OpenMP support*/ 181 PetscBool use_petsc_omp_support; 182 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 183 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 184 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 185 PetscMPIInt tag,omp_comm_size; 186 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 187 MPI_Request *reqs; 188 }; 189 190 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 191 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 192 */ 193 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 194 { 195 PetscErrorCode ierr; 196 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 197 198 PetscFunctionBegin; 199 #if defined(PETSC_USE_64BIT_INDICES) 200 { 201 PetscInt i; 202 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 203 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 204 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 205 mumps->cur_ilen = nrow+1; 206 } 207 if (nnz > mumps->cur_jlen) { 208 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 209 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 210 mumps->cur_jlen = nnz; 211 } 212 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 213 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 214 *ia_mumps = mumps->ia_alloc; 215 *ja_mumps = mumps->ja_alloc; 216 } 217 #else 218 *ia_mumps = ia; 219 *ja_mumps = ja; 220 #endif 221 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 226 { 227 PetscErrorCode ierr; 228 229 PetscFunctionBegin; 230 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 231 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 232 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 233 mumps->id.size_schur = 0; 234 mumps->id.schur_lld = 0; 235 mumps->id.ICNTL(19) = 0; 236 PetscFunctionReturn(0); 237 } 238 239 /* solve with rhs in mumps->id.redrhs and return in the same location */ 240 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 241 { 242 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 243 Mat S,B,X; 244 MatFactorSchurStatus schurstatus; 245 PetscInt sizesol; 246 PetscErrorCode ierr; 247 248 PetscFunctionBegin; 249 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 250 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 251 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 252 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 254 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 255 #endif 256 switch (schurstatus) { 257 case MAT_FACTOR_SCHUR_FACTORED: 258 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 259 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 261 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 262 #endif 263 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 264 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 265 } else { 266 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 267 } 268 break; 269 case MAT_FACTOR_SCHUR_INVERTED: 270 sizesol = mumps->id.nrhs*mumps->id.size_schur; 271 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 272 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 273 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 274 mumps->schur_sizesol = sizesol; 275 } 276 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 277 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 279 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 280 #endif 281 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 282 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 283 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 284 } else { 285 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 286 } 287 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 288 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 289 ierr = MatProductNumeric(X);CHKERRQ(ierr); 290 291 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 292 break; 293 default: 294 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 295 } 296 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 297 ierr = MatDestroy(&B);CHKERRQ(ierr); 298 ierr = MatDestroy(&X);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 309 PetscFunctionReturn(0); 310 } 311 if (!expansion) { /* prepare for the condensation step */ 312 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 313 /* allocate MUMPS internal array to store reduced right-hand sides */ 314 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 315 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->id.lredrhs = mumps->id.size_schur; 317 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 318 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 319 } 320 mumps->id.ICNTL(26) = 1; /* condensation phase */ 321 } else { /* prepare for the expansion step */ 322 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 323 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 324 mumps->id.ICNTL(26) = 2; /* expansion phase */ 325 PetscMUMPS_c(mumps); 326 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 327 /* restore defaults */ 328 mumps->id.ICNTL(26) = -1; 329 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 330 if (mumps->id.nrhs > 1) { 331 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 332 mumps->id.lredrhs = 0; 333 mumps->sizeredrhs = 0; 334 } 335 } 336 PetscFunctionReturn(0); 337 } 338 339 /* 340 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 341 342 input: 343 A - matrix in aij,baij or sbaij format 344 shift - 0: C style output triple; 1: Fortran style output triple. 345 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 346 MAT_REUSE_MATRIX: only the values in v array are updated 347 output: 348 nnz - dim of r, c, and v (number of local nonzero entries of A) 349 r, c, v - row and col index, matrix values (matrix triples) 350 351 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 352 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 353 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 354 355 */ 356 357 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 358 { 359 const PetscScalar *av; 360 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 361 PetscInt64 nz,rnz,i,j,k; 362 PetscErrorCode ierr; 363 PetscMUMPSInt *row,*col; 364 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 365 366 PetscFunctionBegin; 367 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 368 mumps->val = (PetscScalar*)av; 369 if (reuse == MAT_INITIAL_MATRIX) { 370 nz = aa->nz; 371 ai = aa->i; 372 aj = aa->j; 373 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 374 for (i=k=0; i<M; i++) { 375 rnz = ai[i+1] - ai[i]; 376 ajj = aj + ai[i]; 377 for (j=0; j<rnz; j++) { 378 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 379 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 380 k++; 381 } 382 } 383 mumps->irn = row; 384 mumps->jcn = col; 385 mumps->nnz = nz; 386 } 387 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 392 { 393 PetscErrorCode ierr; 394 PetscInt64 nz,i,j,k,r; 395 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 396 PetscMUMPSInt *row,*col; 397 398 PetscFunctionBegin; 399 mumps->val = a->val; 400 if (reuse == MAT_INITIAL_MATRIX) { 401 nz = a->sliidx[a->totalslices]; 402 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 403 for (i=k=0; i<a->totalslices; i++) { 404 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 405 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 406 } 407 } 408 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 409 mumps->irn = row; 410 mumps->jcn = col; 411 mumps->nnz = nz; 412 } 413 PetscFunctionReturn(0); 414 } 415 416 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 417 { 418 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 419 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 420 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 421 PetscInt bs; 422 PetscErrorCode ierr; 423 PetscMUMPSInt *row,*col; 424 425 PetscFunctionBegin; 426 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 427 M = A->rmap->N/bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; aj = aa->j; 431 nz = bs2*aa->nz; 432 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 433 for (i=0; i<M; i++) { 434 ajj = aj + ai[i]; 435 rnz = ai[i+1] - ai[i]; 436 for (k=0; k<rnz; k++) { 437 for (j=0; j<bs; j++) { 438 for (m=0; m<bs; m++) { 439 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 440 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 441 idx++; 442 } 443 } 444 } 445 } 446 mumps->irn = row; 447 mumps->jcn = col; 448 mumps->nnz = nz; 449 } 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 454 { 455 const PetscInt *ai, *aj,*ajj; 456 PetscInt bs; 457 PetscInt64 nz,rnz,i,j,k,m; 458 PetscErrorCode ierr; 459 PetscMUMPSInt *row,*col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 462 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 470 PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 nz = aa->nz; 477 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 478 if (bs>1) { 479 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 480 mumps->val = mumps->val_alloc; 481 } else { 482 mumps->val = aa->a; 483 } 484 mumps->irn = row; 485 mumps->jcn = col; 486 } else { 487 if (bs == 1) mumps->val = aa->a; 488 row = mumps->irn; 489 col = mumps->jcn; 490 } 491 val = mumps->val; 492 493 nz = 0; 494 if (bs>1) { 495 for (i=0; i<mbs; i++) { 496 rnz = ai[i+1] - ai[i]; 497 ajj = aj + ai[i]; 498 for (j=0; j<rnz; j++) { 499 for (k=0; k<bs; k++) { 500 for (m=0; m<bs; m++) { 501 if (ajj[j]>i || k>=m) { 502 if (reuse == MAT_INITIAL_MATRIX) { 503 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 504 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 505 } 506 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 507 } 508 } 509 } 510 } 511 } 512 } else if (reuse == MAT_INITIAL_MATRIX) { 513 for (i=0; i<mbs; i++) { 514 rnz = ai[i+1] - ai[i]; 515 ajj = aj + ai[i]; 516 for (j=0; j<rnz; j++) { 517 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 518 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 519 nz++; 520 } 521 } 522 PetscCheckFalse(nz != aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 523 } 524 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 525 PetscFunctionReturn(0); 526 } 527 528 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 529 { 530 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 531 PetscInt64 nz,rnz,i,j; 532 const PetscScalar *av,*v1; 533 PetscScalar *val; 534 PetscErrorCode ierr; 535 PetscMUMPSInt *row,*col; 536 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 548 ai = aa->i; aj = aa->j; 549 adiag = aa->diag; 550 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 551 if (reuse == MAT_INITIAL_MATRIX) { 552 /* count nz in the upper triangular part of A */ 553 nz = 0; 554 if (missing) { 555 for (i=0; i<M; i++) { 556 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 557 for (j=ai[i];j<ai[i+1];j++) { 558 if (aj[j] < i) continue; 559 nz++; 560 } 561 } else { 562 nz += ai[i+1] - adiag[i]; 563 } 564 } 565 } else { 566 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 567 } 568 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 569 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 570 mumps->nnz = nz; 571 mumps->irn = row; 572 mumps->jcn = col; 573 mumps->val = mumps->val_alloc = val; 574 575 nz = 0; 576 if (missing) { 577 for (i=0; i<M; i++) { 578 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 579 for (j=ai[i];j<ai[i+1];j++) { 580 if (aj[j] < i) continue; 581 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 582 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 583 val[nz] = av[j]; 584 nz++; 585 } 586 } else { 587 rnz = ai[i+1] - adiag[i]; 588 ajj = aj + adiag[i]; 589 v1 = av + adiag[i]; 590 for (j=0; j<rnz; j++) { 591 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 592 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 593 val[nz++] = v1[j]; 594 } 595 } 596 } 597 } else { 598 for (i=0; i<M; i++) { 599 rnz = ai[i+1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j=0; j<rnz; j++) { 603 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 604 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 nz = 0; 611 val = mumps->val; 612 if (missing) { 613 for (i=0; i <M; i++) { 614 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 615 for (j=ai[i];j<ai[i+1];j++) { 616 if (aj[j] < i) continue; 617 val[nz++] = av[j]; 618 } 619 } else { 620 rnz = ai[i+1] - adiag[i]; 621 v1 = av + adiag[i]; 622 for (j=0; j<rnz; j++) { 623 val[nz++] = v1[j]; 624 } 625 } 626 } 627 } else { 628 for (i=0; i <M; i++) { 629 rnz = ai[i+1] - adiag[i]; 630 v1 = av + adiag[i]; 631 for (j=0; j<rnz; j++) { 632 val[nz++] = v1[j]; 633 } 634 } 635 } 636 } 637 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 642 { 643 PetscErrorCode ierr; 644 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 645 PetscInt bs; 646 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 647 PetscMUMPSInt *row,*col; 648 const PetscScalar *av,*bv,*v1,*v2; 649 PetscScalar *val; 650 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 651 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 652 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 653 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 654 #if defined(PETSC_USE_COMPLEX) 655 PetscBool hermitian; 656 #endif 657 658 PetscFunctionBegin; 659 #if defined(PETSC_USE_COMPLEX) 660 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 661 PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 662 #endif 663 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 664 rstart = A->rmap->rstart; 665 ai = aa->i; 666 aj = aa->j; 667 bi = bb->i; 668 bj = bb->j; 669 av = aa->a; 670 bv = bb->a; 671 672 garray = mat->garray; 673 674 if (reuse == MAT_INITIAL_MATRIX) { 675 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 676 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 677 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 678 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 679 mumps->irn = row; 680 mumps->jcn = col; 681 mumps->val = mumps->val_alloc = val; 682 } else { 683 val = mumps->val; 684 } 685 686 jj = 0; irow = rstart; 687 for (i=0; i<mbs; i++) { 688 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 689 countA = ai[i+1] - ai[i]; 690 countB = bi[i+1] - bi[i]; 691 bjj = bj + bi[i]; 692 v1 = av + ai[i]*bs2; 693 v2 = bv + bi[i]*bs2; 694 695 if (bs>1) { 696 /* A-part */ 697 for (j=0; j<countA; j++) { 698 for (k=0; k<bs; k++) { 699 for (m=0; m<bs; m++) { 700 if (rstart + ajj[j]*bs>irow || k>=m) { 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 703 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 704 } 705 val[jj++] = v1[j*bs2 + m + k*bs]; 706 } 707 } 708 } 709 } 710 711 /* B-part */ 712 for (j=0; j < countB; j++) { 713 for (k=0; k<bs; k++) { 714 for (m=0; m<bs; m++) { 715 if (reuse == MAT_INITIAL_MATRIX) { 716 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 717 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 718 } 719 val[jj++] = v2[j*bs2 + m + k*bs]; 720 } 721 } 722 } 723 } else { 724 /* A-part */ 725 for (j=0; j<countA; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 728 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 729 } 730 val[jj++] = v1[j]; 731 } 732 733 /* B-part */ 734 for (j=0; j < countB; j++) { 735 if (reuse == MAT_INITIAL_MATRIX) { 736 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 737 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 738 } 739 val[jj++] = v2[j]; 740 } 741 } 742 irow+=bs; 743 } 744 mumps->nnz = jj; 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 749 { 750 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 751 PetscErrorCode ierr; 752 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 753 PetscMUMPSInt *row,*col; 754 const PetscScalar *av, *bv,*v1,*v2; 755 PetscScalar *val; 756 Mat Ad,Ao; 757 Mat_SeqAIJ *aa; 758 Mat_SeqAIJ *bb; 759 760 PetscFunctionBegin; 761 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 762 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 763 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 764 765 aa = (Mat_SeqAIJ*)(Ad)->data; 766 bb = (Mat_SeqAIJ*)(Ao)->data; 767 ai = aa->i; 768 aj = aa->j; 769 bi = bb->i; 770 bj = bb->j; 771 772 rstart = A->rmap->rstart; 773 774 if (reuse == MAT_INITIAL_MATRIX) { 775 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 776 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 777 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 778 mumps->nnz = nz; 779 mumps->irn = row; 780 mumps->jcn = col; 781 mumps->val = mumps->val_alloc = val; 782 } else { 783 val = mumps->val; 784 } 785 786 jj = 0; irow = rstart; 787 for (i=0; i<m; i++) { 788 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 789 countA = ai[i+1] - ai[i]; 790 countB = bi[i+1] - bi[i]; 791 bjj = bj + bi[i]; 792 v1 = av + ai[i]; 793 v2 = bv + bi[i]; 794 795 /* A-part */ 796 for (j=0; j<countA; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 799 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 800 } 801 val[jj++] = v1[j]; 802 } 803 804 /* B-part */ 805 for (j=0; j < countB; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 808 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 809 } 810 val[jj++] = v2[j]; 811 } 812 irow++; 813 } 814 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 815 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 820 { 821 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 822 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 823 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 824 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 825 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 826 const PetscInt bs2=mat->bs2; 827 PetscErrorCode ierr; 828 PetscInt bs; 829 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 830 PetscMUMPSInt *row,*col; 831 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 832 PetscScalar *val; 833 834 PetscFunctionBegin; 835 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 836 if (reuse == MAT_INITIAL_MATRIX) { 837 nz = bs2*(aa->nz + bb->nz); 838 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 839 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 840 mumps->nnz = nz; 841 mumps->irn = row; 842 mumps->jcn = col; 843 mumps->val = mumps->val_alloc = val; 844 } else { 845 val = mumps->val; 846 } 847 848 jj = 0; irow = rstart; 849 for (i=0; i<mbs; i++) { 850 countA = ai[i+1] - ai[i]; 851 countB = bi[i+1] - bi[i]; 852 ajj = aj + ai[i]; 853 bjj = bj + bi[i]; 854 v1 = av + bs2*ai[i]; 855 v2 = bv + bs2*bi[i]; 856 857 idx = 0; 858 /* A-part */ 859 for (k=0; k<countA; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 864 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 865 } 866 val[jj++] = v1[idx++]; 867 } 868 } 869 } 870 871 idx = 0; 872 /* B-part */ 873 for (k=0; k<countB; k++) { 874 for (j=0; j<bs; j++) { 875 for (n=0; n<bs; n++) { 876 if (reuse == MAT_INITIAL_MATRIX) { 877 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 878 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 879 } 880 val[jj++] = v2[idx++]; 881 } 882 } 883 } 884 irow += bs; 885 } 886 PetscFunctionReturn(0); 887 } 888 889 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 890 { 891 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 892 PetscErrorCode ierr; 893 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 894 PetscMUMPSInt *row,*col; 895 const PetscScalar *av, *bv,*v1,*v2; 896 PetscScalar *val; 897 Mat Ad,Ao; 898 Mat_SeqAIJ *aa; 899 Mat_SeqAIJ *bb; 900 #if defined(PETSC_USE_COMPLEX) 901 PetscBool hermitian; 902 #endif 903 904 PetscFunctionBegin; 905 #if defined(PETSC_USE_COMPLEX) 906 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 907 PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 908 #endif 909 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 911 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 912 913 aa = (Mat_SeqAIJ*)(Ad)->data; 914 bb = (Mat_SeqAIJ*)(Ao)->data; 915 ai = aa->i; 916 aj = aa->j; 917 adiag = aa->diag; 918 bi = bb->i; 919 bj = bb->j; 920 921 rstart = A->rmap->rstart; 922 923 if (reuse == MAT_INITIAL_MATRIX) { 924 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 925 nzb = 0; /* num of upper triangular entries in mat->B */ 926 for (i=0; i<m; i++) { 927 nza += (ai[i+1] - adiag[i]); 928 countB = bi[i+1] - bi[i]; 929 bjj = bj + bi[i]; 930 for (j=0; j<countB; j++) { 931 if (garray[bjj[j]] > rstart) nzb++; 932 } 933 } 934 935 nz = nza + nzb; /* total nz of upper triangular part of mat */ 936 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 937 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 938 mumps->nnz = nz; 939 mumps->irn = row; 940 mumps->jcn = col; 941 mumps->val = mumps->val_alloc = val; 942 } else { 943 val = mumps->val; 944 } 945 946 jj = 0; irow = rstart; 947 for (i=0; i<m; i++) { 948 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 949 v1 = av + adiag[i]; 950 countA = ai[i+1] - adiag[i]; 951 countB = bi[i+1] - bi[i]; 952 bjj = bj + bi[i]; 953 v2 = bv + bi[i]; 954 955 /* A-part */ 956 for (j=0; j<countA; j++) { 957 if (reuse == MAT_INITIAL_MATRIX) { 958 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 959 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 960 } 961 val[jj++] = v1[j]; 962 } 963 964 /* B-part */ 965 for (j=0; j < countB; j++) { 966 if (garray[bjj[j]] > rstart) { 967 if (reuse == MAT_INITIAL_MATRIX) { 968 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 969 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 970 } 971 val[jj++] = v2[j]; 972 } 973 } 974 irow++; 975 } 976 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 977 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatDestroy_MUMPS(Mat A) 982 { 983 PetscErrorCode ierr; 984 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 985 986 PetscFunctionBegin; 987 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 988 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 989 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 990 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 991 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 993 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 994 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 995 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 996 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 997 mumps->id.job = JOB_END; 998 PetscMUMPS_c(mumps); 999 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1000 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1001 if (mumps->use_petsc_omp_support) { 1002 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1003 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1004 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1005 } 1006 #endif 1007 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1010 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1011 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1012 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);} 1013 ierr = PetscFree(A->data);CHKERRQ(ierr); 1014 1015 /* clear composed functions */ 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1033 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1034 { 1035 PetscErrorCode ierr; 1036 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1037 const PetscMPIInt ompsize=mumps->omp_comm_size; 1038 PetscInt i,m,M,rstart; 1039 1040 PetscFunctionBegin; 1041 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1042 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1043 PetscCheckFalse(M > PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1044 if (ompsize == 1) { 1045 if (!mumps->irhs_loc) { 1046 mumps->nloc_rhs = m; 1047 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1048 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1049 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1050 } 1051 mumps->id.rhs_loc = (MumpsScalar*)array; 1052 } else { 1053 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1054 const PetscInt *ranges; 1055 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1056 MPI_Group petsc_group,omp_group; 1057 PetscScalar *recvbuf=NULL; 1058 1059 if (mumps->is_omp_master) { 1060 /* Lazily initialize the omp stuff for distributed rhs */ 1061 if (!mumps->irhs_loc) { 1062 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1063 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1064 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr); 1065 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr); 1066 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1067 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr); 1068 1069 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1070 mumps->nloc_rhs = 0; 1071 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1072 for (j=0; j<ompsize; j++) { 1073 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1074 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1075 } 1076 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1077 for (j=k=0; j<ompsize; j++) { 1078 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1079 } 1080 1081 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1082 ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr); 1083 ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr); 1084 } 1085 1086 /* Realloc buffers when current nrhs is bigger than what we have met */ 1087 if (nrhs > mumps->max_nrhs) { 1088 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1089 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1090 mumps->max_nrhs = nrhs; 1091 } 1092 1093 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1094 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1095 mumps->rhs_disps[0] = 0; 1096 for (j=1; j<ompsize; j++) { 1097 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1098 PetscCheckFalse(mumps->rhs_disps[j] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1099 } 1100 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1101 } 1102 1103 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1104 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr); 1105 1106 if (mumps->is_omp_master) { 1107 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1108 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1109 for (j=0; j<ompsize; j++) { 1110 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1111 dst = dstbase; 1112 for (i=0; i<nrhs; i++) { 1113 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1114 src += mumps->rhs_nrow[j]; 1115 dst += mumps->nloc_rhs; 1116 } 1117 dstbase += mumps->rhs_nrow[j]; 1118 } 1119 } 1120 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1121 } 1122 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1123 } 1124 mumps->id.nrhs = nrhs; 1125 mumps->id.nloc_rhs = mumps->nloc_rhs; 1126 mumps->id.lrhs_loc = mumps->nloc_rhs; 1127 mumps->id.irhs_loc = mumps->irhs_loc; 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1132 { 1133 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1134 const PetscScalar *rarray = NULL; 1135 PetscScalar *array; 1136 IS is_iden,is_petsc; 1137 PetscErrorCode ierr; 1138 PetscInt i; 1139 PetscBool second_solve = PETSC_FALSE; 1140 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1141 1142 PetscFunctionBegin; 1143 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1144 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1145 1146 if (A->factorerrortype) { 1147 ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1148 ierr = VecSetInf(x);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 mumps->id.nrhs = 1; 1153 if (mumps->petsc_size > 1) { 1154 if (mumps->ICNTL20 == 10) { 1155 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1156 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1157 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1158 } else { 1159 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1160 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1162 if (!mumps->myid) { 1163 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1164 mumps->id.rhs = (MumpsScalar*)array; 1165 } 1166 } 1167 } else { /* petsc_size == 1 */ 1168 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1169 ierr = VecCopy(b,x);CHKERRQ(ierr); 1170 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1171 mumps->id.rhs = (MumpsScalar*)array; 1172 } 1173 1174 /* 1175 handle condensation step of Schur complement (if any) 1176 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1177 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1178 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1179 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1180 */ 1181 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1182 PetscCheckFalse(mumps->petsc_size > 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1183 second_solve = PETSC_TRUE; 1184 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1185 } 1186 /* solve phase */ 1187 /*-------------*/ 1188 mumps->id.job = JOB_SOLVE; 1189 PetscMUMPS_c(mumps); 1190 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1191 1192 /* handle expansion step of Schur complement (if any) */ 1193 if (second_solve) { 1194 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1195 } 1196 1197 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1198 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1199 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1200 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1201 } 1202 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1203 PetscInt *isol2_loc=NULL; 1204 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1205 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1206 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1207 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1208 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1209 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1210 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1211 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1212 } 1213 1214 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 } 1217 1218 if (mumps->petsc_size > 1) { 1219 if (mumps->ICNTL20 == 10) { 1220 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1221 } else if (!mumps->myid) { 1222 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1223 } 1224 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1225 1226 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1231 { 1232 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 mumps->id.ICNTL(9) = 0; 1237 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1238 mumps->id.ICNTL(9) = 1; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1243 { 1244 PetscErrorCode ierr; 1245 Mat Bt = NULL; 1246 PetscBool denseX,denseB,flg,flgT; 1247 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1248 PetscInt i,nrhs,M; 1249 PetscScalar *array; 1250 const PetscScalar *rbray; 1251 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1252 PetscMUMPSInt *isol_loc,*isol_loc_save; 1253 PetscScalar *bray,*sol_loc,*sol_loc_save; 1254 IS is_to,is_from; 1255 PetscInt k,proc,j,m,myrstart; 1256 const PetscInt *rstart; 1257 Vec v_mpi,msol_loc; 1258 VecScatter scat_sol; 1259 Vec b_seq; 1260 VecScatter scat_rhs; 1261 PetscScalar *aa; 1262 PetscInt spnr,*ia,*ja; 1263 Mat_MPIAIJ *b = NULL; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1267 PetscCheckFalse(!denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1268 1269 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1270 if (denseB) { 1271 PetscCheckFalse(B->rmap->n != X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1272 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1273 } else { /* sparse B */ 1274 PetscCheckFalse(X == B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1275 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1276 if (flgT) { /* input B is transpose of actural RHS matrix, 1277 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1278 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1279 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1280 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1281 } 1282 1283 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1284 mumps->id.nrhs = nrhs; 1285 mumps->id.lrhs = M; 1286 mumps->id.rhs = NULL; 1287 1288 if (mumps->petsc_size == 1) { 1289 PetscScalar *aa; 1290 PetscInt spnr,*ia,*ja; 1291 PetscBool second_solve = PETSC_FALSE; 1292 1293 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1294 mumps->id.rhs = (MumpsScalar*)array; 1295 1296 if (denseB) { 1297 /* copy B to X */ 1298 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1299 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1300 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1301 } else { /* sparse B */ 1302 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1303 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1304 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1305 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1306 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1307 } 1308 /* handle condensation step of Schur complement (if any) */ 1309 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1310 second_solve = PETSC_TRUE; 1311 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1312 } 1313 /* solve phase */ 1314 /*-------------*/ 1315 mumps->id.job = JOB_SOLVE; 1316 PetscMUMPS_c(mumps); 1317 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1318 1319 /* handle expansion step of Schur complement (if any) */ 1320 if (second_solve) { 1321 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1322 } 1323 if (!denseB) { /* sparse B */ 1324 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1325 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1326 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1327 } 1328 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1333 PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1334 1335 /* create msol_loc to hold mumps local solution */ 1336 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1337 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1338 1339 lsol_loc = mumps->id.lsol_loc; 1340 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1341 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1342 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1343 mumps->id.isol_loc = isol_loc; 1344 1345 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1346 1347 if (denseB) { 1348 if (mumps->ICNTL20 == 10) { 1349 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1350 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1351 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1352 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1353 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1354 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1355 } else { 1356 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1357 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1358 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1359 0, re-arrange B into desired order, which is a local operation. 1360 */ 1361 1362 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1363 /* wrap dense rhs matrix B into a vector v_mpi */ 1364 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1365 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1366 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1367 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1368 1369 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1370 if (!mumps->myid) { 1371 PetscInt *idx; 1372 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1373 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1374 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1375 k = 0; 1376 for (proc=0; proc<mumps->petsc_size; proc++) { 1377 for (j=0; j<nrhs; j++) { 1378 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1379 } 1380 } 1381 1382 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1383 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1384 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1385 } else { 1386 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1387 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1388 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1389 } 1390 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1391 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1392 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1393 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1394 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1395 1396 if (!mumps->myid) { /* define rhs on the host */ 1397 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1398 mumps->id.rhs = (MumpsScalar*)bray; 1399 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1400 } 1401 } 1402 } else { /* sparse B */ 1403 b = (Mat_MPIAIJ*)Bt->data; 1404 1405 /* wrap dense X into a vector v_mpi */ 1406 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1407 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1408 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1409 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1410 1411 if (!mumps->myid) { 1412 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1413 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1414 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1415 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1416 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1417 } else { 1418 mumps->id.irhs_ptr = NULL; 1419 mumps->id.irhs_sparse = NULL; 1420 mumps->id.nz_rhs = 0; 1421 mumps->id.rhs_sparse = NULL; 1422 } 1423 } 1424 1425 /* solve phase */ 1426 /*-------------*/ 1427 mumps->id.job = JOB_SOLVE; 1428 PetscMUMPS_c(mumps); 1429 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1430 1431 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1432 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1433 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1434 1435 /* create scatter scat_sol */ 1436 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1437 /* iidx: index for scatter mumps solution to petsc X */ 1438 1439 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1440 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1441 for (i=0; i<lsol_loc; i++) { 1442 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1443 1444 for (proc=0; proc<mumps->petsc_size; proc++) { 1445 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1446 myrstart = rstart[proc]; 1447 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1448 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1449 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1450 break; 1451 } 1452 } 1453 1454 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1455 } 1456 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1457 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1458 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1459 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1460 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1461 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1463 1464 /* free spaces */ 1465 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1466 mumps->id.isol_loc = isol_loc_save; 1467 1468 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1469 ierr = PetscFree(idxx);CHKERRQ(ierr); 1470 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1471 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1472 if (!denseB) { 1473 if (!mumps->myid) { 1474 b = (Mat_MPIAIJ*)Bt->data; 1475 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1476 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1477 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1478 } 1479 } else { 1480 if (mumps->ICNTL20 == 0) { 1481 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1482 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1483 } 1484 } 1485 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1486 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1487 PetscFunctionReturn(0); 1488 } 1489 1490 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1491 { 1492 PetscErrorCode ierr; 1493 PetscBool flg; 1494 Mat B; 1495 1496 PetscFunctionBegin; 1497 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1498 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1499 1500 /* Create B=Bt^T that uses Bt's data structure */ 1501 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1502 1503 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1504 ierr = MatDestroy(&B);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #if !defined(PETSC_USE_COMPLEX) 1509 /* 1510 input: 1511 F: numeric factor 1512 output: 1513 nneg: total number of negative pivots 1514 nzero: total number of zero pivots 1515 npos: (global dimension of F) - nneg - nzero 1516 */ 1517 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1518 { 1519 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1520 PetscErrorCode ierr; 1521 PetscMPIInt size; 1522 1523 PetscFunctionBegin; 1524 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr); 1525 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1526 PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1527 1528 if (nneg) *nneg = mumps->id.INFOG(12); 1529 if (nzero || npos) { 1530 PetscCheckFalse(mumps->id.ICNTL(24) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1531 if (nzero) *nzero = mumps->id.INFOG(28); 1532 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 #endif 1537 1538 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1539 { 1540 PetscErrorCode ierr; 1541 PetscInt i,nreqs; 1542 PetscMUMPSInt *irn,*jcn; 1543 PetscMPIInt count; 1544 PetscInt64 totnnz,remain; 1545 const PetscInt osize=mumps->omp_comm_size; 1546 PetscScalar *val; 1547 1548 PetscFunctionBegin; 1549 if (osize > 1) { 1550 if (reuse == MAT_INITIAL_MATRIX) { 1551 /* master first gathers counts of nonzeros to receive */ 1552 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1553 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr); 1554 1555 /* Then each computes number of send/recvs */ 1556 if (mumps->is_omp_master) { 1557 /* Start from 1 since self communication is not done in MPI */ 1558 nreqs = 0; 1559 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1560 } else { 1561 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1562 } 1563 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1564 1565 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1566 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1567 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1568 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1569 */ 1570 nreqs = 0; /* counter for actual send/recvs */ 1571 if (mumps->is_omp_master) { 1572 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1573 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1574 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1575 1576 /* Self communication */ 1577 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1578 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1579 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1580 1581 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1582 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1583 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1584 mumps->nnz = totnnz; 1585 mumps->irn = irn; 1586 mumps->jcn = jcn; 1587 mumps->val = mumps->val_alloc = val; 1588 1589 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1590 jcn += mumps->recvcount[0]; 1591 val += mumps->recvcount[0]; 1592 1593 /* Remote communication */ 1594 for (i=1; i<osize; i++) { 1595 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1596 remain = mumps->recvcount[i] - count; 1597 while (count>0) { 1598 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1599 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1600 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1601 irn += count; 1602 jcn += count; 1603 val += count; 1604 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1605 remain -= count; 1606 } 1607 } 1608 } else { 1609 irn = mumps->irn; 1610 jcn = mumps->jcn; 1611 val = mumps->val; 1612 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1613 remain = mumps->nnz - count; 1614 while (count>0) { 1615 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1616 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1617 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1618 irn += count; 1619 jcn += count; 1620 val += count; 1621 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1622 remain -= count; 1623 } 1624 } 1625 } else { 1626 nreqs = 0; 1627 if (mumps->is_omp_master) { 1628 val = mumps->val + mumps->recvcount[0]; 1629 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1630 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1631 remain = mumps->recvcount[i] - count; 1632 while (count>0) { 1633 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1634 val += count; 1635 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1636 remain -= count; 1637 } 1638 } 1639 } else { 1640 val = mumps->val; 1641 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1642 remain = mumps->nnz - count; 1643 while (count>0) { 1644 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1645 val += count; 1646 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1647 remain -= count; 1648 } 1649 } 1650 } 1651 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1652 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1658 { 1659 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1660 PetscErrorCode ierr; 1661 PetscBool isMPIAIJ; 1662 1663 PetscFunctionBegin; 1664 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1665 if (mumps->id.INFOG(1) == -6) { 1666 ierr = PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1667 } 1668 ierr = PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 1672 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1673 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1674 1675 /* numerical factorization phase */ 1676 /*-------------------------------*/ 1677 mumps->id.job = JOB_FACTNUMERIC; 1678 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1679 if (!mumps->myid) { 1680 mumps->id.a = (MumpsScalar*)mumps->val; 1681 } 1682 } else { 1683 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1684 } 1685 PetscMUMPS_c(mumps); 1686 if (mumps->id.INFOG(1) < 0) { 1687 if (A->erroriffailure) { 1688 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1689 } else { 1690 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1691 ierr = PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1692 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1693 } else if (mumps->id.INFOG(1) == -13) { 1694 ierr = PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1695 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1696 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1697 ierr = PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1698 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1699 } else { 1700 ierr = PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1701 F->factorerrortype = MAT_FACTOR_OTHER; 1702 } 1703 } 1704 } 1705 PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1706 1707 F->assembled = PETSC_TRUE; 1708 1709 if (F->schur) { /* reset Schur status to unfactored */ 1710 #if defined(PETSC_HAVE_CUDA) 1711 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1712 #endif 1713 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1714 mumps->id.ICNTL(19) = 2; 1715 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1716 } 1717 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1718 } 1719 1720 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1721 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1722 1723 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1724 if (mumps->petsc_size > 1) { 1725 PetscInt lsol_loc; 1726 PetscScalar *sol_loc; 1727 1728 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1729 1730 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1731 if (mumps->x_seq) { 1732 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1733 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1734 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1735 } 1736 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1737 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1738 mumps->id.lsol_loc = lsol_loc; 1739 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1740 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1741 } 1742 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 /* Sets MUMPS options from the options database */ 1747 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1748 { 1749 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1750 PetscErrorCode ierr; 1751 PetscMUMPSInt icntl=0; 1752 PetscInt info[80],i,ninfo=80; 1753 PetscBool flg=PETSC_FALSE; 1754 1755 PetscFunctionBegin; 1756 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1757 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1758 if (flg) mumps->id.ICNTL(1) = icntl; 1759 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1760 if (flg) mumps->id.ICNTL(2) = icntl; 1761 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1762 if (flg) mumps->id.ICNTL(3) = icntl; 1763 1764 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1765 if (flg) mumps->id.ICNTL(4) = icntl; 1766 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1767 1768 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1769 if (flg) mumps->id.ICNTL(6) = icntl; 1770 1771 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1772 if (flg) { 1773 PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1774 mumps->id.ICNTL(7) = icntl; 1775 } 1776 1777 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1778 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1783 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1784 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1785 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1786 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1787 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1788 } 1789 1790 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1791 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1792 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1793 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1794 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1795 In short, we could not use distributed RHS with MPICH until v4.0b1. 1796 */ 1797 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1798 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1799 #else 1800 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1801 #endif 1802 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1803 PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1804 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1805 PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1806 #endif 1807 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1808 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1810 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1811 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1812 if (mumps->id.ICNTL(24)) { 1813 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1814 } 1815 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1819 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1821 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1823 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1824 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1826 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1827 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1828 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1830 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1831 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1832 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1833 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1834 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1835 1836 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1837 1838 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1839 if (ninfo) { 1840 PetscCheckFalse(ninfo > 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1841 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1842 mumps->ninfo = ninfo; 1843 for (i=0; i<ninfo; i++) { 1844 PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1845 else mumps->info[i] = info[i]; 1846 } 1847 } 1848 1849 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1854 { 1855 PetscErrorCode ierr; 1856 PetscInt nthreads=0; 1857 1858 PetscFunctionBegin; 1859 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1860 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr); 1861 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */ 1862 1863 ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1864 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1865 ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1866 if (mumps->use_petsc_omp_support) { 1867 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1868 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1869 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1870 #else 1871 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1872 #endif 1873 } else { 1874 mumps->omp_comm = PETSC_COMM_SELF; 1875 mumps->mumps_comm = mumps->petsc_comm; 1876 mumps->is_omp_master = PETSC_TRUE; 1877 } 1878 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr); 1879 mumps->reqs = NULL; 1880 mumps->tag = 0; 1881 1882 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1883 if (mumps->mumps_comm != MPI_COMM_NULL) { 1884 ierr = PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr); 1885 } 1886 1887 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1888 mumps->id.job = JOB_INIT; 1889 mumps->id.par = 1; /* host participates factorizaton and solve */ 1890 mumps->id.sym = mumps->sym; 1891 1892 PetscMUMPS_c(mumps); 1893 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1894 1895 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1896 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1897 */ 1898 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRMPI(ierr); 1899 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr); 1900 1901 mumps->scat_rhs = NULL; 1902 mumps->scat_sol = NULL; 1903 1904 /* set PETSc-MUMPS default options - override MUMPS default */ 1905 mumps->id.ICNTL(3) = 0; 1906 mumps->id.ICNTL(4) = 0; 1907 if (mumps->petsc_size == 1) { 1908 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1909 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1910 } else { 1911 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1912 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1913 } 1914 1915 /* schur */ 1916 mumps->id.size_schur = 0; 1917 mumps->id.listvar_schur = NULL; 1918 mumps->id.schur = NULL; 1919 mumps->sizeredrhs = 0; 1920 mumps->schur_sol = NULL; 1921 mumps->schur_sizesol = 0; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1926 { 1927 PetscErrorCode ierr; 1928 1929 PetscFunctionBegin; 1930 if (mumps->id.INFOG(1) < 0) { 1931 if (A->erroriffailure) { 1932 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1933 } else { 1934 if (mumps->id.INFOG(1) == -6) { 1935 ierr = PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1936 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1937 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1938 ierr = PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1939 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1940 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1941 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1942 } else { 1943 ierr = PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1944 F->factorerrortype = MAT_FACTOR_OTHER; 1945 } 1946 } 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1952 { 1953 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1954 PetscErrorCode ierr; 1955 Vec b; 1956 const PetscInt M = A->rmap->N; 1957 1958 PetscFunctionBegin; 1959 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1960 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* Set MUMPS options from the options database */ 1965 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1966 1967 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1968 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1969 1970 /* analysis phase */ 1971 /*----------------*/ 1972 mumps->id.job = JOB_FACTSYMBOLIC; 1973 mumps->id.n = M; 1974 switch (mumps->id.ICNTL(18)) { 1975 case 0: /* centralized assembled matrix input */ 1976 if (!mumps->myid) { 1977 mumps->id.nnz = mumps->nnz; 1978 mumps->id.irn = mumps->irn; 1979 mumps->id.jcn = mumps->jcn; 1980 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1981 if (r) { 1982 mumps->id.ICNTL(7) = 1; 1983 if (!mumps->myid) { 1984 const PetscInt *idx; 1985 PetscInt i; 1986 1987 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1988 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1989 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1990 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1991 } 1992 } 1993 } 1994 break; 1995 case 3: /* distributed assembled matrix input (size>1) */ 1996 mumps->id.nnz_loc = mumps->nnz; 1997 mumps->id.irn_loc = mumps->irn; 1998 mumps->id.jcn_loc = mumps->jcn; 1999 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 2000 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2001 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2002 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2003 ierr = VecDestroy(&b);CHKERRQ(ierr); 2004 } 2005 break; 2006 } 2007 PetscMUMPS_c(mumps); 2008 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2009 2010 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2011 F->ops->solve = MatSolve_MUMPS; 2012 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2013 F->ops->matsolve = MatMatSolve_MUMPS; 2014 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2015 2016 mumps->matstruc = SAME_NONZERO_PATTERN; 2017 PetscFunctionReturn(0); 2018 } 2019 2020 /* Note the Petsc r and c permutations are ignored */ 2021 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2022 { 2023 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2024 PetscErrorCode ierr; 2025 Vec b; 2026 const PetscInt M = A->rmap->N; 2027 2028 PetscFunctionBegin; 2029 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2030 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /* Set MUMPS options from the options database */ 2035 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2036 2037 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2038 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2039 2040 /* analysis phase */ 2041 /*----------------*/ 2042 mumps->id.job = JOB_FACTSYMBOLIC; 2043 mumps->id.n = M; 2044 switch (mumps->id.ICNTL(18)) { 2045 case 0: /* centralized assembled matrix input */ 2046 if (!mumps->myid) { 2047 mumps->id.nnz = mumps->nnz; 2048 mumps->id.irn = mumps->irn; 2049 mumps->id.jcn = mumps->jcn; 2050 if (mumps->id.ICNTL(6)>1) { 2051 mumps->id.a = (MumpsScalar*)mumps->val; 2052 } 2053 } 2054 break; 2055 case 3: /* distributed assembled matrix input (size>1) */ 2056 mumps->id.nnz_loc = mumps->nnz; 2057 mumps->id.irn_loc = mumps->irn; 2058 mumps->id.jcn_loc = mumps->jcn; 2059 if (mumps->id.ICNTL(6)>1) { 2060 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2061 } 2062 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2063 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2064 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2065 ierr = VecDestroy(&b);CHKERRQ(ierr); 2066 } 2067 break; 2068 } 2069 PetscMUMPS_c(mumps); 2070 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2071 2072 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2073 F->ops->solve = MatSolve_MUMPS; 2074 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2075 2076 mumps->matstruc = SAME_NONZERO_PATTERN; 2077 PetscFunctionReturn(0); 2078 } 2079 2080 /* Note the Petsc r permutation and factor info are ignored */ 2081 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2082 { 2083 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2084 PetscErrorCode ierr; 2085 Vec b; 2086 const PetscInt M = A->rmap->N; 2087 2088 PetscFunctionBegin; 2089 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2090 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2091 PetscFunctionReturn(0); 2092 } 2093 2094 /* Set MUMPS options from the options database */ 2095 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2096 2097 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2098 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2099 2100 /* analysis phase */ 2101 /*----------------*/ 2102 mumps->id.job = JOB_FACTSYMBOLIC; 2103 mumps->id.n = M; 2104 switch (mumps->id.ICNTL(18)) { 2105 case 0: /* centralized assembled matrix input */ 2106 if (!mumps->myid) { 2107 mumps->id.nnz = mumps->nnz; 2108 mumps->id.irn = mumps->irn; 2109 mumps->id.jcn = mumps->jcn; 2110 if (mumps->id.ICNTL(6)>1) { 2111 mumps->id.a = (MumpsScalar*)mumps->val; 2112 } 2113 } 2114 break; 2115 case 3: /* distributed assembled matrix input (size>1) */ 2116 mumps->id.nnz_loc = mumps->nnz; 2117 mumps->id.irn_loc = mumps->irn; 2118 mumps->id.jcn_loc = mumps->jcn; 2119 if (mumps->id.ICNTL(6)>1) { 2120 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2121 } 2122 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2123 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2124 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2125 ierr = VecDestroy(&b);CHKERRQ(ierr); 2126 } 2127 break; 2128 } 2129 PetscMUMPS_c(mumps); 2130 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2131 2132 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2133 F->ops->solve = MatSolve_MUMPS; 2134 F->ops->solvetranspose = MatSolve_MUMPS; 2135 F->ops->matsolve = MatMatSolve_MUMPS; 2136 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2137 #if defined(PETSC_USE_COMPLEX) 2138 F->ops->getinertia = NULL; 2139 #else 2140 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2141 #endif 2142 2143 mumps->matstruc = SAME_NONZERO_PATTERN; 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2148 { 2149 PetscErrorCode ierr; 2150 PetscBool iascii; 2151 PetscViewerFormat format; 2152 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2153 2154 PetscFunctionBegin; 2155 /* check if matrix is mumps type */ 2156 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2157 2158 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2159 if (iascii) { 2160 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2161 if (format == PETSC_VIEWER_ASCII_INFO) { 2162 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2175 if (mumps->id.ICNTL(11)>0) { 2176 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2182 } 2183 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2184 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2186 /* ICNTL(15-17) not used */ 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2190 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2192 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2193 2194 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2196 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2197 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2198 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2199 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2200 2201 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2202 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2203 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2204 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2205 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2206 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2207 2208 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2209 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2210 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2211 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2212 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2213 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2214 2215 /* information local to each processor */ 2216 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2217 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2218 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2219 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2220 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2221 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2222 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2223 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2224 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2225 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2226 2227 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2228 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2229 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2230 2231 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2232 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2233 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2234 2235 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2236 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2237 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2238 2239 if (mumps->ninfo && mumps->ninfo <= 80) { 2240 PetscInt i; 2241 for (i=0; i<mumps->ninfo; i++) { 2242 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i]);CHKERRQ(ierr); 2243 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2244 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2245 } 2246 } 2247 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2248 2249 if (!mumps->myid) { /* information from the host */ 2250 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2254 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr); 2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr); 2272 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr); 2273 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr); 2274 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr); 2275 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr); 2276 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr); 2277 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr); 2278 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2279 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2280 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2281 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2282 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2283 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2284 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2285 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr); 2286 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr); 2287 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr); 2288 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr); 2289 } 2290 } 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2296 { 2297 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2298 2299 PetscFunctionBegin; 2300 info->block_size = 1.0; 2301 info->nz_allocated = mumps->id.INFOG(20); 2302 info->nz_used = mumps->id.INFOG(20); 2303 info->nz_unneeded = 0.0; 2304 info->assemblies = 0.0; 2305 info->mallocs = 0.0; 2306 info->memory = 0.0; 2307 info->fill_ratio_given = 0; 2308 info->fill_ratio_needed = 0; 2309 info->factor_mallocs = 0; 2310 PetscFunctionReturn(0); 2311 } 2312 2313 /* -------------------------------------------------------------------------------------------*/ 2314 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2315 { 2316 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2317 const PetscScalar *arr; 2318 const PetscInt *idxs; 2319 PetscInt size,i; 2320 PetscErrorCode ierr; 2321 2322 PetscFunctionBegin; 2323 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2324 if (mumps->petsc_size > 1) { 2325 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2326 2327 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2328 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr); 2329 PetscCheckFalse(!gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2330 } 2331 2332 /* Schur complement matrix */ 2333 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2334 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2335 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2336 mumps->id.schur = (MumpsScalar*)arr; 2337 mumps->id.size_schur = size; 2338 mumps->id.schur_lld = size; 2339 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2340 if (mumps->sym == 1) { 2341 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2342 } 2343 2344 /* MUMPS expects Fortran style indices */ 2345 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2346 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2347 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2348 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2349 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2350 if (mumps->petsc_size > 1) { 2351 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2352 } else { 2353 if (F->factortype == MAT_FACTOR_LU) { 2354 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2355 } else { 2356 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2357 } 2358 } 2359 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2360 mumps->id.ICNTL(26) = -1; 2361 PetscFunctionReturn(0); 2362 } 2363 2364 /* -------------------------------------------------------------------------------------------*/ 2365 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2366 { 2367 Mat St; 2368 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2369 PetscScalar *array; 2370 #if defined(PETSC_USE_COMPLEX) 2371 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2372 #endif 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 PetscCheckFalse(!mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2377 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2378 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2379 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2380 ierr = MatSetUp(St);CHKERRQ(ierr); 2381 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2382 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2383 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2384 PetscInt i,j,N=mumps->id.size_schur; 2385 for (i=0;i<N;i++) { 2386 for (j=0;j<N;j++) { 2387 #if !defined(PETSC_USE_COMPLEX) 2388 PetscScalar val = mumps->id.schur[i*N+j]; 2389 #else 2390 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2391 #endif 2392 array[j*N+i] = val; 2393 } 2394 } 2395 } else { /* stored by columns */ 2396 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2397 } 2398 } else { /* either full or lower-triangular (not packed) */ 2399 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2400 PetscInt i,j,N=mumps->id.size_schur; 2401 for (i=0;i<N;i++) { 2402 for (j=i;j<N;j++) { 2403 #if !defined(PETSC_USE_COMPLEX) 2404 PetscScalar val = mumps->id.schur[i*N+j]; 2405 #else 2406 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2407 #endif 2408 array[i*N+j] = val; 2409 array[j*N+i] = val; 2410 } 2411 } 2412 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2413 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2414 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2415 PetscInt i,j,N=mumps->id.size_schur; 2416 for (i=0;i<N;i++) { 2417 for (j=0;j<i+1;j++) { 2418 #if !defined(PETSC_USE_COMPLEX) 2419 PetscScalar val = mumps->id.schur[i*N+j]; 2420 #else 2421 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2422 #endif 2423 array[i*N+j] = val; 2424 array[j*N+i] = val; 2425 } 2426 } 2427 } 2428 } 2429 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2430 *S = St; 2431 PetscFunctionReturn(0); 2432 } 2433 2434 /* -------------------------------------------------------------------------------------------*/ 2435 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2436 { 2437 PetscErrorCode ierr; 2438 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2446 { 2447 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2448 2449 PetscFunctionBegin; 2450 *ival = mumps->id.ICNTL(icntl); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 /*@ 2455 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2456 2457 Logically Collective on Mat 2458 2459 Input Parameters: 2460 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2461 . icntl - index of MUMPS parameter array ICNTL() 2462 - ival - value of MUMPS ICNTL(icntl) 2463 2464 Options Database: 2465 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2466 2467 Level: beginner 2468 2469 References: 2470 . * - MUMPS Users' Guide 2471 2472 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2473 @*/ 2474 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2475 { 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 PetscValidType(F,1); 2480 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2481 PetscValidLogicalCollectiveInt(F,icntl,2); 2482 PetscValidLogicalCollectiveInt(F,ival,3); 2483 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /*@ 2488 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2489 2490 Logically Collective on Mat 2491 2492 Input Parameters: 2493 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2494 - icntl - index of MUMPS parameter array ICNTL() 2495 2496 Output Parameter: 2497 . ival - value of MUMPS ICNTL(icntl) 2498 2499 Level: beginner 2500 2501 References: 2502 . * - MUMPS Users' Guide 2503 2504 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2505 @*/ 2506 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 PetscValidType(F,1); 2512 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2513 PetscValidLogicalCollectiveInt(F,icntl,2); 2514 PetscValidIntPointer(ival,3); 2515 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 /* -------------------------------------------------------------------------------------------*/ 2520 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2521 { 2522 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2523 2524 PetscFunctionBegin; 2525 mumps->id.CNTL(icntl) = val; 2526 PetscFunctionReturn(0); 2527 } 2528 2529 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2530 { 2531 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2532 2533 PetscFunctionBegin; 2534 *val = mumps->id.CNTL(icntl); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 /*@ 2539 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2540 2541 Logically Collective on Mat 2542 2543 Input Parameters: 2544 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2545 . icntl - index of MUMPS parameter array CNTL() 2546 - val - value of MUMPS CNTL(icntl) 2547 2548 Options Database: 2549 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2550 2551 Level: beginner 2552 2553 References: 2554 . * - MUMPS Users' Guide 2555 2556 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2557 @*/ 2558 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2559 { 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 PetscValidType(F,1); 2564 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2565 PetscValidLogicalCollectiveInt(F,icntl,2); 2566 PetscValidLogicalCollectiveReal(F,val,3); 2567 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 /*@ 2572 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2573 2574 Logically Collective on Mat 2575 2576 Input Parameters: 2577 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2578 - icntl - index of MUMPS parameter array CNTL() 2579 2580 Output Parameter: 2581 . val - value of MUMPS CNTL(icntl) 2582 2583 Level: beginner 2584 2585 References: 2586 . * - MUMPS Users' Guide 2587 2588 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2589 @*/ 2590 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2591 { 2592 PetscErrorCode ierr; 2593 2594 PetscFunctionBegin; 2595 PetscValidType(F,1); 2596 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2597 PetscValidLogicalCollectiveInt(F,icntl,2); 2598 PetscValidRealPointer(val,3); 2599 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2604 { 2605 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2606 2607 PetscFunctionBegin; 2608 *info = mumps->id.INFO(icntl); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2613 { 2614 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2615 2616 PetscFunctionBegin; 2617 *infog = mumps->id.INFOG(icntl); 2618 PetscFunctionReturn(0); 2619 } 2620 2621 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2622 { 2623 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2624 2625 PetscFunctionBegin; 2626 *rinfo = mumps->id.RINFO(icntl); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2631 { 2632 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2633 2634 PetscFunctionBegin; 2635 *rinfog = mumps->id.RINFOG(icntl); 2636 PetscFunctionReturn(0); 2637 } 2638 2639 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2640 { 2641 PetscErrorCode ierr; 2642 Mat Bt = NULL,Btseq = NULL; 2643 PetscBool flg; 2644 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2645 PetscScalar *aa; 2646 PetscInt spnr,*ia,*ja,M,nrhs; 2647 2648 PetscFunctionBegin; 2649 PetscValidPointer(spRHS,2); 2650 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2651 if (flg) { 2652 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2653 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2654 2655 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2656 2657 if (mumps->petsc_size > 1) { 2658 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2659 Btseq = b->A; 2660 } else { 2661 Btseq = Bt; 2662 } 2663 2664 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2665 mumps->id.nrhs = nrhs; 2666 mumps->id.lrhs = M; 2667 mumps->id.rhs = NULL; 2668 2669 if (!mumps->myid) { 2670 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2671 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2672 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2673 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2674 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2675 } else { 2676 mumps->id.irhs_ptr = NULL; 2677 mumps->id.irhs_sparse = NULL; 2678 mumps->id.nz_rhs = 0; 2679 mumps->id.rhs_sparse = NULL; 2680 } 2681 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2682 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2683 2684 /* solve phase */ 2685 /*-------------*/ 2686 mumps->id.job = JOB_SOLVE; 2687 PetscMUMPS_c(mumps); 2688 if (mumps->id.INFOG(1) < 0) 2689 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2690 2691 if (!mumps->myid) { 2692 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2693 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2694 PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2695 } 2696 PetscFunctionReturn(0); 2697 } 2698 2699 /*@ 2700 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2701 2702 Logically Collective on Mat 2703 2704 Input Parameters: 2705 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2706 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2707 2708 Output Parameter: 2709 . spRHS - requested entries of inverse of A 2710 2711 Level: beginner 2712 2713 References: 2714 . * - MUMPS Users' Guide 2715 2716 .seealso: MatGetFactor(), MatCreateTranspose() 2717 @*/ 2718 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2719 { 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 PetscValidType(F,1); 2724 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2725 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2730 { 2731 PetscErrorCode ierr; 2732 Mat spRHS; 2733 2734 PetscFunctionBegin; 2735 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2736 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2737 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 /*@ 2742 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2743 2744 Logically Collective on Mat 2745 2746 Input Parameters: 2747 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2748 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2749 2750 Output Parameter: 2751 . spRHST - requested entries of inverse of A^T 2752 2753 Level: beginner 2754 2755 References: 2756 . * - MUMPS Users' Guide 2757 2758 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2759 @*/ 2760 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2761 { 2762 PetscErrorCode ierr; 2763 PetscBool flg; 2764 2765 PetscFunctionBegin; 2766 PetscValidType(F,1); 2767 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2768 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2769 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2770 2771 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 /*@ 2776 MatMumpsGetInfo - Get MUMPS parameter INFO() 2777 2778 Logically Collective on Mat 2779 2780 Input Parameters: 2781 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2782 - icntl - index of MUMPS parameter array INFO() 2783 2784 Output Parameter: 2785 . ival - value of MUMPS INFO(icntl) 2786 2787 Level: beginner 2788 2789 References: 2790 . * - MUMPS Users' Guide 2791 2792 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2793 @*/ 2794 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2795 { 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 PetscValidType(F,1); 2800 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2801 PetscValidIntPointer(ival,3); 2802 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 /*@ 2807 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2808 2809 Logically Collective on Mat 2810 2811 Input Parameters: 2812 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2813 - icntl - index of MUMPS parameter array INFOG() 2814 2815 Output Parameter: 2816 . ival - value of MUMPS INFOG(icntl) 2817 2818 Level: beginner 2819 2820 References: 2821 . * - MUMPS Users' Guide 2822 2823 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2824 @*/ 2825 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2826 { 2827 PetscErrorCode ierr; 2828 2829 PetscFunctionBegin; 2830 PetscValidType(F,1); 2831 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2832 PetscValidIntPointer(ival,3); 2833 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 /*@ 2838 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2839 2840 Logically Collective on Mat 2841 2842 Input Parameters: 2843 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2844 - icntl - index of MUMPS parameter array RINFO() 2845 2846 Output Parameter: 2847 . val - value of MUMPS RINFO(icntl) 2848 2849 Level: beginner 2850 2851 References: 2852 . * - MUMPS Users' Guide 2853 2854 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2855 @*/ 2856 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2857 { 2858 PetscErrorCode ierr; 2859 2860 PetscFunctionBegin; 2861 PetscValidType(F,1); 2862 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2863 PetscValidRealPointer(val,3); 2864 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 /*@ 2869 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2870 2871 Logically Collective on Mat 2872 2873 Input Parameters: 2874 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2875 - icntl - index of MUMPS parameter array RINFOG() 2876 2877 Output Parameter: 2878 . val - value of MUMPS RINFOG(icntl) 2879 2880 Level: beginner 2881 2882 References: 2883 . * - MUMPS Users' Guide 2884 2885 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2886 @*/ 2887 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2888 { 2889 PetscErrorCode ierr; 2890 2891 PetscFunctionBegin; 2892 PetscValidType(F,1); 2893 PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2894 PetscValidRealPointer(val,3); 2895 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2896 PetscFunctionReturn(0); 2897 } 2898 2899 /*MC 2900 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2901 distributed and sequential matrices via the external package MUMPS. 2902 2903 Works with MATAIJ and MATSBAIJ matrices 2904 2905 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2906 2907 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2908 2909 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2910 2911 Options Database Keys: 2912 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2913 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2914 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2915 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2916 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2917 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2918 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2919 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2920 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2921 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2922 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2923 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2924 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2925 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2926 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2927 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2928 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2929 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2930 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2931 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2932 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2933 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2934 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2935 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2936 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2937 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2938 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2939 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2940 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2941 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2942 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2943 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2944 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2945 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2946 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2947 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2948 2949 Level: beginner 2950 2951 Notes: 2952 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2953 2954 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2955 $ KSPGetPC(ksp,&pc); 2956 $ PCFactorGetMatrix(pc,&mat); 2957 $ MatMumpsGetInfo(mat,....); 2958 $ MatMumpsGetInfog(mat,....); etc. 2959 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2960 2961 Using MUMPS with 64-bit integers 2962 MUMPS provides 64-bit integer support in two build modes: 2963 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2964 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2965 2966 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2967 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2968 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2969 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2970 2971 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2972 2973 Two modes to run MUMPS/PETSc with OpenMP 2974 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2975 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2976 2977 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2978 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2979 2980 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2981 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2982 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2983 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2984 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2985 2986 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2987 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2988 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2989 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2990 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2991 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2992 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2993 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2994 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2995 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2996 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2997 examine the mapping result. 2998 2999 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 3000 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 3001 calls omp_set_num_threads(m) internally before calling MUMPS. 3002 3003 References: 3004 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3005 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3006 3007 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix() 3008 3009 M*/ 3010 3011 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 3012 { 3013 PetscFunctionBegin; 3014 *type = MATSOLVERMUMPS; 3015 PetscFunctionReturn(0); 3016 } 3017 3018 /* MatGetFactor for Seq and MPI AIJ matrices */ 3019 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3020 { 3021 Mat B; 3022 PetscErrorCode ierr; 3023 Mat_MUMPS *mumps; 3024 PetscBool isSeqAIJ; 3025 PetscMPIInt size; 3026 3027 PetscFunctionBegin; 3028 #if defined(PETSC_USE_COMPLEX) 3029 PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3030 #endif 3031 /* Create the factorization matrix */ 3032 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3033 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3034 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3035 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3036 ierr = MatSetUp(B);CHKERRQ(ierr); 3037 3038 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3039 3040 B->ops->view = MatView_MUMPS; 3041 B->ops->getinfo = MatGetInfo_MUMPS; 3042 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3056 3057 if (ftype == MAT_FACTOR_LU) { 3058 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3059 B->factortype = MAT_FACTOR_LU; 3060 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3061 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3062 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3063 mumps->sym = 0; 3064 } else { 3065 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3066 B->factortype = MAT_FACTOR_CHOLESKY; 3067 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3068 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3069 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3070 #if defined(PETSC_USE_COMPLEX) 3071 mumps->sym = 2; 3072 #else 3073 if (A->spd_set && A->spd) mumps->sym = 1; 3074 else mumps->sym = 2; 3075 #endif 3076 } 3077 3078 /* set solvertype */ 3079 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3080 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3081 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3082 if (size == 1) { 3083 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3084 B->canuseordering = PETSC_TRUE; 3085 } 3086 B->ops->destroy = MatDestroy_MUMPS; 3087 B->data = (void*)mumps; 3088 3089 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3090 3091 *F = B; 3092 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3093 PetscFunctionReturn(0); 3094 } 3095 3096 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3097 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3098 { 3099 Mat B; 3100 PetscErrorCode ierr; 3101 Mat_MUMPS *mumps; 3102 PetscBool isSeqSBAIJ; 3103 PetscMPIInt size; 3104 3105 PetscFunctionBegin; 3106 #if defined(PETSC_USE_COMPLEX) 3107 PetscCheckFalse(A->hermitian && !A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3108 #endif 3109 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3110 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3111 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3112 ierr = MatSetUp(B);CHKERRQ(ierr); 3113 3114 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3115 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3116 if (isSeqSBAIJ) { 3117 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3118 } else { 3119 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3120 } 3121 3122 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3123 B->ops->view = MatView_MUMPS; 3124 B->ops->getinfo = MatGetInfo_MUMPS; 3125 3126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3132 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3135 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3137 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3138 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3139 3140 B->factortype = MAT_FACTOR_CHOLESKY; 3141 #if defined(PETSC_USE_COMPLEX) 3142 mumps->sym = 2; 3143 #else 3144 if (A->spd_set && A->spd) mumps->sym = 1; 3145 else mumps->sym = 2; 3146 #endif 3147 3148 /* set solvertype */ 3149 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3150 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3151 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3152 if (size == 1) { 3153 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3154 B->canuseordering = PETSC_TRUE; 3155 } 3156 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3157 B->ops->destroy = MatDestroy_MUMPS; 3158 B->data = (void*)mumps; 3159 3160 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3161 3162 *F = B; 3163 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3164 PetscFunctionReturn(0); 3165 } 3166 3167 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3168 { 3169 Mat B; 3170 PetscErrorCode ierr; 3171 Mat_MUMPS *mumps; 3172 PetscBool isSeqBAIJ; 3173 PetscMPIInt size; 3174 3175 PetscFunctionBegin; 3176 /* Create the factorization matrix */ 3177 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3178 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3179 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3180 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3181 ierr = MatSetUp(B);CHKERRQ(ierr); 3182 3183 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3184 if (ftype == MAT_FACTOR_LU) { 3185 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3186 B->factortype = MAT_FACTOR_LU; 3187 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3188 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3189 mumps->sym = 0; 3190 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3192 3193 B->ops->view = MatView_MUMPS; 3194 B->ops->getinfo = MatGetInfo_MUMPS; 3195 3196 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3197 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3198 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3199 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3200 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3204 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3205 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3206 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3209 3210 /* set solvertype */ 3211 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3212 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3213 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3214 if (size == 1) { 3215 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3216 B->canuseordering = PETSC_TRUE; 3217 } 3218 B->ops->destroy = MatDestroy_MUMPS; 3219 B->data = (void*)mumps; 3220 3221 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3222 3223 *F = B; 3224 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3225 PetscFunctionReturn(0); 3226 } 3227 3228 /* MatGetFactor for Seq and MPI SELL matrices */ 3229 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3230 { 3231 Mat B; 3232 PetscErrorCode ierr; 3233 Mat_MUMPS *mumps; 3234 PetscBool isSeqSELL; 3235 PetscMPIInt size; 3236 3237 PetscFunctionBegin; 3238 /* Create the factorization matrix */ 3239 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3240 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3241 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3242 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3243 ierr = MatSetUp(B);CHKERRQ(ierr); 3244 3245 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3246 3247 B->ops->view = MatView_MUMPS; 3248 B->ops->getinfo = MatGetInfo_MUMPS; 3249 3250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3251 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3253 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3256 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3259 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3261 3262 if (ftype == MAT_FACTOR_LU) { 3263 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3264 B->factortype = MAT_FACTOR_LU; 3265 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3266 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3267 mumps->sym = 0; 3268 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3269 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3270 3271 /* set solvertype */ 3272 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3273 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3275 if (size == 1) { 3276 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3277 B->canuseordering = PETSC_TRUE; 3278 } 3279 B->ops->destroy = MatDestroy_MUMPS; 3280 B->data = (void*)mumps; 3281 3282 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3283 3284 *F = B; 3285 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3286 PetscFunctionReturn(0); 3287 } 3288 3289 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3290 { 3291 PetscErrorCode ierr; 3292 3293 PetscFunctionBegin; 3294 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3295 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3296 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3297 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3298 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3299 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3300 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3301 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3302 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3303 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3304 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 3308