1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 Vec b_seq,x_seq; 170 PetscInt ninfo,*info; /* which INFO to display */ 171 PetscInt sizeredrhs; 172 PetscScalar *schur_sol; 173 PetscInt schur_sizesol; 174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 177 178 /* stuff used by petsc/mumps OpenMP support*/ 179 PetscBool use_petsc_omp_support; 180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 183 PetscMPIInt tag,omp_comm_size; 184 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 185 MPI_Request *reqs; 186 }; 187 188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 190 */ 191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 192 { 193 PetscErrorCode ierr; 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 231 mumps->id.size_schur = 0; 232 mumps->id.schur_lld = 0; 233 mumps->id.ICNTL(19) = 0; 234 PetscFunctionReturn(0); 235 } 236 237 /* solve with rhs in mumps->id.redrhs and return in the same location */ 238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 239 { 240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 241 Mat S,B,X; 242 MatFactorSchurStatus schurstatus; 243 PetscInt sizesol; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 263 } else { 264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 272 mumps->schur_sizesol = sizesol; 273 } 274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 278 #endif 279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 282 } else { 283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 284 } 285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 286 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 287 ierr = MatProductNumeric(X);CHKERRQ(ierr); 288 289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 290 break; 291 default: 292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 293 } 294 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 295 ierr = MatDestroy(&B);CHKERRQ(ierr); 296 ierr = MatDestroy(&X);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 307 PetscFunctionReturn(0); 308 } 309 if (!expansion) { /* prepare for the condensation step */ 310 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 311 /* allocate MUMPS internal array to store reduced right-hand sides */ 312 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 313 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 314 mumps->id.lredrhs = mumps->id.size_schur; 315 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 317 } 318 mumps->id.ICNTL(26) = 1; /* condensation phase */ 319 } else { /* prepare for the expansion step */ 320 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 321 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 322 mumps->id.ICNTL(26) = 2; /* expansion phase */ 323 PetscMUMPS_c(mumps); 324 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 325 /* restore defaults */ 326 mumps->id.ICNTL(26) = -1; 327 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 328 if (mumps->id.nrhs > 1) { 329 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 330 mumps->id.lredrhs = 0; 331 mumps->sizeredrhs = 0; 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 /* 338 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 339 340 input: 341 A - matrix in aij,baij or sbaij format 342 shift - 0: C style output triple; 1: Fortran style output triple. 343 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 344 MAT_REUSE_MATRIX: only the values in v array are updated 345 output: 346 nnz - dim of r, c, and v (number of local nonzero entries of A) 347 r, c, v - row and col index, matrix values (matrix triples) 348 349 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 350 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 351 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 352 353 */ 354 355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 356 { 357 const PetscScalar *av; 358 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 359 PetscInt64 nz,rnz,i,j,k; 360 PetscErrorCode ierr; 361 PetscMUMPSInt *row,*col; 362 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 366 mumps->val = (PetscScalar*)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 372 for (i=k=0; i<M; i++) { 373 rnz = ai[i+1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j=0; j<rnz; j++) { 376 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 377 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 390 { 391 PetscErrorCode ierr; 392 PetscInt64 nz,i,j,k,r; 393 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 394 PetscMUMPSInt *row,*col; 395 396 PetscFunctionBegin; 397 mumps->val = a->val; 398 if (reuse == MAT_INITIAL_MATRIX) { 399 nz = a->sliidx[a->totalslices]; 400 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 401 for (i=k=0; i<a->totalslices; i++) { 402 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 403 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 404 } 405 } 406 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 407 mumps->irn = row; 408 mumps->jcn = col; 409 mumps->nnz = nz; 410 } 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 415 { 416 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 417 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 418 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 419 PetscInt bs; 420 PetscErrorCode ierr; 421 PetscMUMPSInt *row,*col; 422 423 PetscFunctionBegin; 424 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 425 M = A->rmap->N/bs; 426 mumps->val = aa->a; 427 if (reuse == MAT_INITIAL_MATRIX) { 428 ai = aa->i; aj = aa->j; 429 nz = bs2*aa->nz; 430 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 431 for (i=0; i<M; i++) { 432 ajj = aj + ai[i]; 433 rnz = ai[i+1] - ai[i]; 434 for (k=0; k<rnz; k++) { 435 for (j=0; j<bs; j++) { 436 for (m=0; m<bs; m++) { 437 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 438 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 439 idx++; 440 } 441 } 442 } 443 } 444 mumps->irn = row; 445 mumps->jcn = col; 446 mumps->nnz = nz; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 452 { 453 const PetscInt *ai, *aj,*ajj; 454 PetscInt bs; 455 PetscInt64 nz,rnz,i,j,k,m; 456 PetscErrorCode ierr; 457 PetscMUMPSInt *row,*col; 458 PetscScalar *val; 459 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 460 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscBool hermitian; 463 #endif 464 465 PetscFunctionBegin; 466 #if defined(PETSC_USE_COMPLEX) 467 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 468 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 469 #endif 470 ai = aa->i; 471 aj = aa->j; 472 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 473 if (reuse == MAT_INITIAL_MATRIX) { 474 nz = aa->nz; 475 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 476 if (bs>1) { 477 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 478 mumps->val = mumps->val_alloc; 479 } else { 480 mumps->val = aa->a; 481 } 482 mumps->irn = row; 483 mumps->jcn = col; 484 } else { 485 if (bs == 1) mumps->val = aa->a; 486 row = mumps->irn; 487 col = mumps->jcn; 488 } 489 val = mumps->val; 490 491 nz = 0; 492 if (bs>1) { 493 for (i=0; i<mbs; i++) { 494 rnz = ai[i+1] - ai[i]; 495 ajj = aj + ai[i]; 496 for (j=0; j<rnz; j++) { 497 for (k=0; k<bs; k++) { 498 for (m=0; m<bs; m++) { 499 if (ajj[j]>i || k>=m) { 500 if (reuse == MAT_INITIAL_MATRIX) { 501 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 502 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 503 } 504 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 505 } 506 } 507 } 508 } 509 } 510 } else if (reuse == MAT_INITIAL_MATRIX) { 511 for (i=0; i<mbs; i++) { 512 rnz = ai[i+1] - ai[i]; 513 ajj = aj + ai[i]; 514 for (j=0; j<rnz; j++) { 515 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 516 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 517 nz++; 518 } 519 } 520 if (PetscUnlikely(nz != aa->nz)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 521 } 522 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 527 { 528 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 529 PetscInt64 nz,rnz,i,j; 530 const PetscScalar *av,*v1; 531 PetscScalar *val; 532 PetscErrorCode ierr; 533 PetscMUMPSInt *row,*col; 534 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 535 PetscBool missing; 536 #if defined(PETSC_USE_COMPLEX) 537 PetscBool hermitian; 538 #endif 539 540 PetscFunctionBegin; 541 #if defined(PETSC_USE_COMPLEX) 542 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 543 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 544 #endif 545 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 546 ai = aa->i; aj = aa->j; 547 adiag = aa->diag; 548 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 549 if (reuse == MAT_INITIAL_MATRIX) { 550 /* count nz in the upper triangular part of A */ 551 nz = 0; 552 if (missing) { 553 for (i=0; i<M; i++) { 554 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 555 for (j=ai[i];j<ai[i+1];j++) { 556 if (aj[j] < i) continue; 557 nz++; 558 } 559 } else { 560 nz += ai[i+1] - adiag[i]; 561 } 562 } 563 } else { 564 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 565 } 566 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 567 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 568 mumps->nnz = nz; 569 mumps->irn = row; 570 mumps->jcn = col; 571 mumps->val = mumps->val_alloc = val; 572 573 nz = 0; 574 if (missing) { 575 for (i=0; i<M; i++) { 576 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 577 for (j=ai[i];j<ai[i+1];j++) { 578 if (aj[j] < i) continue; 579 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 580 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 581 val[nz] = av[j]; 582 nz++; 583 } 584 } else { 585 rnz = ai[i+1] - adiag[i]; 586 ajj = aj + adiag[i]; 587 v1 = av + adiag[i]; 588 for (j=0; j<rnz; j++) { 589 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 590 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 591 val[nz++] = v1[j]; 592 } 593 } 594 } 595 } else { 596 for (i=0; i<M; i++) { 597 rnz = ai[i+1] - adiag[i]; 598 ajj = aj + adiag[i]; 599 v1 = av + adiag[i]; 600 for (j=0; j<rnz; j++) { 601 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 602 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 603 val[nz++] = v1[j]; 604 } 605 } 606 } 607 } else { 608 nz = 0; 609 val = mumps->val; 610 if (missing) { 611 for (i=0; i <M; i++) { 612 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 613 for (j=ai[i];j<ai[i+1];j++) { 614 if (aj[j] < i) continue; 615 val[nz++] = av[j]; 616 } 617 } else { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } else { 626 for (i=0; i <M; i++) { 627 rnz = ai[i+1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j=0; j<rnz; j++) { 630 val[nz++] = v1[j]; 631 } 632 } 633 } 634 } 635 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 640 { 641 PetscErrorCode ierr; 642 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 643 PetscInt bs; 644 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 645 PetscMUMPSInt *row,*col; 646 const PetscScalar *av,*bv,*v1,*v2; 647 PetscScalar *val; 648 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 649 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 650 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 651 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 652 #if defined(PETSC_USE_COMPLEX) 653 PetscBool hermitian; 654 #endif 655 656 PetscFunctionBegin; 657 #if defined(PETSC_USE_COMPLEX) 658 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 659 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 660 #endif 661 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 662 rstart = A->rmap->rstart; 663 ai = aa->i; 664 aj = aa->j; 665 bi = bb->i; 666 bj = bb->j; 667 av = aa->a; 668 bv = bb->a; 669 670 garray = mat->garray; 671 672 if (reuse == MAT_INITIAL_MATRIX) { 673 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 674 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 675 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 676 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 677 mumps->irn = row; 678 mumps->jcn = col; 679 mumps->val = mumps->val_alloc = val; 680 } else { 681 val = mumps->val; 682 } 683 684 jj = 0; irow = rstart; 685 for (i=0; i<mbs; i++) { 686 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687 countA = ai[i+1] - ai[i]; 688 countB = bi[i+1] - bi[i]; 689 bjj = bj + bi[i]; 690 v1 = av + ai[i]*bs2; 691 v2 = bv + bi[i]*bs2; 692 693 if (bs>1) { 694 /* A-part */ 695 for (j=0; j<countA; j++) { 696 for (k=0; k<bs; k++) { 697 for (m=0; m<bs; m++) { 698 if (rstart + ajj[j]*bs>irow || k>=m) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 701 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 702 } 703 val[jj++] = v1[j*bs2 + m + k*bs]; 704 } 705 } 706 } 707 } 708 709 /* B-part */ 710 for (j=0; j < countB; j++) { 711 for (k=0; k<bs; k++) { 712 for (m=0; m<bs; m++) { 713 if (reuse == MAT_INITIAL_MATRIX) { 714 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 715 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 716 } 717 val[jj++] = v2[j*bs2 + m + k*bs]; 718 } 719 } 720 } 721 } else { 722 /* A-part */ 723 for (j=0; j<countA; j++) { 724 if (reuse == MAT_INITIAL_MATRIX) { 725 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 726 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 727 } 728 val[jj++] = v1[j]; 729 } 730 731 /* B-part */ 732 for (j=0; j < countB; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 735 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 736 } 737 val[jj++] = v2[j]; 738 } 739 } 740 irow+=bs; 741 } 742 mumps->nnz = jj; 743 PetscFunctionReturn(0); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 747 { 748 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 749 PetscErrorCode ierr; 750 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 751 PetscMUMPSInt *row,*col; 752 const PetscScalar *av, *bv,*v1,*v2; 753 PetscScalar *val; 754 Mat Ad,Ao; 755 Mat_SeqAIJ *aa; 756 Mat_SeqAIJ *bb; 757 758 PetscFunctionBegin; 759 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 760 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 761 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 762 763 aa = (Mat_SeqAIJ*)(Ad)->data; 764 bb = (Mat_SeqAIJ*)(Ao)->data; 765 ai = aa->i; 766 aj = aa->j; 767 bi = bb->i; 768 bj = bb->j; 769 770 rstart = A->rmap->rstart; 771 772 if (reuse == MAT_INITIAL_MATRIX) { 773 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 774 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 775 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 776 mumps->nnz = nz; 777 mumps->irn = row; 778 mumps->jcn = col; 779 mumps->val = mumps->val_alloc = val; 780 } else { 781 val = mumps->val; 782 } 783 784 jj = 0; irow = rstart; 785 for (i=0; i<m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i+1] - ai[i]; 788 countB = bi[i+1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j=0; j<countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 797 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j=0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 806 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 813 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 823 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 824 const PetscInt bs2=mat->bs2; 825 PetscErrorCode ierr; 826 PetscInt bs; 827 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 828 PetscMUMPSInt *row,*col; 829 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 830 PetscScalar *val; 831 832 PetscFunctionBegin; 833 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 834 if (reuse == MAT_INITIAL_MATRIX) { 835 nz = bs2*(aa->nz + bb->nz); 836 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 837 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 838 mumps->nnz = nz; 839 mumps->irn = row; 840 mumps->jcn = col; 841 mumps->val = mumps->val_alloc = val; 842 } else { 843 val = mumps->val; 844 } 845 846 jj = 0; irow = rstart; 847 for (i=0; i<mbs; i++) { 848 countA = ai[i+1] - ai[i]; 849 countB = bi[i+1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2*ai[i]; 853 v2 = bv + bs2*bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k=0; k<countA; k++) { 858 for (j=0; j<bs; j++) { 859 for (n=0; n<bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 862 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k=0; k<countB; k++) { 872 for (j=0; j<bs; j++) { 873 for (n=0; n<bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 876 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 890 PetscErrorCode ierr; 891 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 892 PetscMUMPSInt *row,*col; 893 const PetscScalar *av, *bv,*v1,*v2; 894 PetscScalar *val; 895 Mat Ad,Ao; 896 Mat_SeqAIJ *aa; 897 Mat_SeqAIJ *bb; 898 #if defined(PETSC_USE_COMPLEX) 899 PetscBool hermitian; 900 #endif 901 902 PetscFunctionBegin; 903 #if defined(PETSC_USE_COMPLEX) 904 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 905 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 906 #endif 907 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 908 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 910 911 aa = (Mat_SeqAIJ*)(Ad)->data; 912 bb = (Mat_SeqAIJ*)(Ao)->data; 913 ai = aa->i; 914 aj = aa->j; 915 adiag = aa->diag; 916 bi = bb->i; 917 bj = bb->j; 918 919 rstart = A->rmap->rstart; 920 921 if (reuse == MAT_INITIAL_MATRIX) { 922 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 923 nzb = 0; /* num of upper triangular entries in mat->B */ 924 for (i=0; i<m; i++) { 925 nza += (ai[i+1] - adiag[i]); 926 countB = bi[i+1] - bi[i]; 927 bjj = bj + bi[i]; 928 for (j=0; j<countB; j++) { 929 if (garray[bjj[j]] > rstart) nzb++; 930 } 931 } 932 933 nz = nza + nzb; /* total nz of upper triangular part of mat */ 934 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 935 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 936 mumps->nnz = nz; 937 mumps->irn = row; 938 mumps->jcn = col; 939 mumps->val = mumps->val_alloc = val; 940 } else { 941 val = mumps->val; 942 } 943 944 jj = 0; irow = rstart; 945 for (i=0; i<m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i+1] - adiag[i]; 949 countB = bi[i+1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j=0; j<countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 957 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j=0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 967 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 975 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatDestroy_MUMPS(Mat A) 980 { 981 PetscErrorCode ierr; 982 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 983 984 PetscFunctionBegin; 985 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 986 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 988 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 989 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 990 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 991 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 994 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 995 mumps->id.job = JOB_END; 996 PetscMUMPS_c(mumps); 997 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1001 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1002 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1003 } 1004 #endif 1005 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1006 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1007 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1010 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);} 1011 ierr = PetscFree(A->data);CHKERRQ(ierr); 1012 1013 /* clear composed functions */ 1014 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1032 { 1033 PetscErrorCode ierr; 1034 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1035 const PetscMPIInt ompsize=mumps->omp_comm_size; 1036 PetscInt i,m,M,rstart; 1037 1038 PetscFunctionBegin; 1039 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1040 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1041 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1042 if (ompsize == 1) { 1043 if (!mumps->irhs_loc) { 1044 mumps->nloc_rhs = m; 1045 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1047 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1048 } 1049 mumps->id.rhs_loc = (MumpsScalar*)array; 1050 } else { 1051 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1052 const PetscInt *ranges; 1053 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1054 MPI_Group petsc_group,omp_group; 1055 PetscScalar *recvbuf=NULL; 1056 1057 if (mumps->is_omp_master) { 1058 /* Lazily initialize the omp stuff for distributed rhs */ 1059 if (!mumps->irhs_loc) { 1060 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1061 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1062 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr); 1063 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr); 1064 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1065 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr); 1066 1067 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1068 mumps->nloc_rhs = 0; 1069 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) { 1071 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1072 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1073 } 1074 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1075 for (j=k=0; j<ompsize; j++) { 1076 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1077 } 1078 1079 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1080 ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr); 1081 ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr); 1082 } 1083 1084 /* Realloc buffers when current nrhs is bigger than what we have met */ 1085 if (nrhs > mumps->max_nrhs) { 1086 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1087 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1088 mumps->max_nrhs = nrhs; 1089 } 1090 1091 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1092 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1093 mumps->rhs_disps[0] = 0; 1094 for (j=1; j<ompsize; j++) { 1095 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1096 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1097 } 1098 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1099 } 1100 1101 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1102 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr); 1103 1104 if (mumps->is_omp_master) { 1105 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1106 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1107 for (j=0; j<ompsize; j++) { 1108 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1109 dst = dstbase; 1110 for (i=0; i<nrhs; i++) { 1111 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1112 src += mumps->rhs_nrow[j]; 1113 dst += mumps->nloc_rhs; 1114 } 1115 dstbase += mumps->rhs_nrow[j]; 1116 } 1117 } 1118 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1119 } 1120 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1121 } 1122 mumps->id.nrhs = nrhs; 1123 mumps->id.nloc_rhs = mumps->nloc_rhs; 1124 mumps->id.lrhs_loc = mumps->nloc_rhs; 1125 mumps->id.irhs_loc = mumps->irhs_loc; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1130 { 1131 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1132 const PetscScalar *rarray = NULL; 1133 PetscScalar *array; 1134 IS is_iden,is_petsc; 1135 PetscErrorCode ierr; 1136 PetscInt i; 1137 PetscBool second_solve = PETSC_FALSE; 1138 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1139 1140 PetscFunctionBegin; 1141 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1142 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1143 1144 if (A->factorerrortype) { 1145 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1146 ierr = VecSetInf(x);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 mumps->id.nrhs = 1; 1151 if (mumps->petsc_size > 1) { 1152 if (mumps->ICNTL20 == 10) { 1153 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1154 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1155 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1156 } else { 1157 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1158 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 if (!mumps->myid) { 1161 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1162 mumps->id.rhs = (MumpsScalar*)array; 1163 } 1164 } 1165 } else { /* petsc_size == 1 */ 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1167 ierr = VecCopy(b,x);CHKERRQ(ierr); 1168 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1169 mumps->id.rhs = (MumpsScalar*)array; 1170 } 1171 1172 /* 1173 handle condensation step of Schur complement (if any) 1174 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1175 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1176 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1177 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1178 */ 1179 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1180 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1181 second_solve = PETSC_TRUE; 1182 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) { 1192 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1193 } 1194 1195 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1196 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1197 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1198 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1199 } 1200 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1201 PetscInt *isol2_loc=NULL; 1202 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1203 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1204 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1206 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1209 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1210 } 1211 1212 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 } 1215 1216 if (mumps->petsc_size > 1) { 1217 if (mumps->ICNTL20 == 10) { 1218 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1219 } else if (!mumps->myid) { 1220 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1221 } 1222 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1223 1224 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1229 { 1230 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 mumps->id.ICNTL(9) = 0; 1235 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1236 mumps->id.ICNTL(9) = 1; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1241 { 1242 PetscErrorCode ierr; 1243 Mat Bt = NULL; 1244 PetscBool denseX,denseB,flg,flgT; 1245 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1246 PetscInt i,nrhs,M; 1247 PetscScalar *array; 1248 const PetscScalar *rbray; 1249 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1250 PetscMUMPSInt *isol_loc,*isol_loc_save; 1251 PetscScalar *bray,*sol_loc,*sol_loc_save; 1252 IS is_to,is_from; 1253 PetscInt k,proc,j,m,myrstart; 1254 const PetscInt *rstart; 1255 Vec v_mpi,msol_loc; 1256 VecScatter scat_sol; 1257 Vec b_seq; 1258 VecScatter scat_rhs; 1259 PetscScalar *aa; 1260 PetscInt spnr,*ia,*ja; 1261 Mat_MPIAIJ *b = NULL; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1265 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1266 1267 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1268 if (denseB) { 1269 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1270 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1271 } else { /* sparse B */ 1272 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1273 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1274 if (flgT) { /* input B is transpose of actural RHS matrix, 1275 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1276 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1277 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1278 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1279 } 1280 1281 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1282 mumps->id.nrhs = nrhs; 1283 mumps->id.lrhs = M; 1284 mumps->id.rhs = NULL; 1285 1286 if (mumps->petsc_size == 1) { 1287 PetscScalar *aa; 1288 PetscInt spnr,*ia,*ja; 1289 PetscBool second_solve = PETSC_FALSE; 1290 1291 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1292 mumps->id.rhs = (MumpsScalar*)array; 1293 1294 if (denseB) { 1295 /* copy B to X */ 1296 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1297 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1299 } else { /* sparse B */ 1300 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1301 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1302 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1303 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1304 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1305 } 1306 /* handle condensation step of Schur complement (if any) */ 1307 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1308 second_solve = PETSC_TRUE; 1309 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1310 } 1311 /* solve phase */ 1312 /*-------------*/ 1313 mumps->id.job = JOB_SOLVE; 1314 PetscMUMPS_c(mumps); 1315 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1316 1317 /* handle expansion step of Schur complement (if any) */ 1318 if (second_solve) { 1319 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1320 } 1321 if (!denseB) { /* sparse B */ 1322 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1323 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1324 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1325 } 1326 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1331 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1332 1333 /* create msol_loc to hold mumps local solution */ 1334 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1335 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1336 1337 lsol_loc = mumps->id.lsol_loc; 1338 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1339 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1340 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1341 mumps->id.isol_loc = isol_loc; 1342 1343 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1344 1345 if (denseB) { 1346 if (mumps->ICNTL20 == 10) { 1347 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1348 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1349 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1350 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1351 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1352 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1353 } else { 1354 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1355 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1356 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1357 0, re-arrange B into desired order, which is a local operation. 1358 */ 1359 1360 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1361 /* wrap dense rhs matrix B into a vector v_mpi */ 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1364 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1365 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1366 1367 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1368 if (!mumps->myid) { 1369 PetscInt *idx; 1370 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1371 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1372 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1373 k = 0; 1374 for (proc=0; proc<mumps->petsc_size; proc++) { 1375 for (j=0; j<nrhs; j++) { 1376 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1377 } 1378 } 1379 1380 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1382 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1383 } else { 1384 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1385 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1387 } 1388 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1389 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 1394 if (!mumps->myid) { /* define rhs on the host */ 1395 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1396 mumps->id.rhs = (MumpsScalar*)bray; 1397 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1398 } 1399 } 1400 } else { /* sparse B */ 1401 b = (Mat_MPIAIJ*)Bt->data; 1402 1403 /* wrap dense X into a vector v_mpi */ 1404 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1405 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1406 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1407 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1408 1409 if (!mumps->myid) { 1410 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1411 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1412 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1413 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1414 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1415 } else { 1416 mumps->id.irhs_ptr = NULL; 1417 mumps->id.irhs_sparse = NULL; 1418 mumps->id.nz_rhs = 0; 1419 mumps->id.rhs_sparse = NULL; 1420 } 1421 } 1422 1423 /* solve phase */ 1424 /*-------------*/ 1425 mumps->id.job = JOB_SOLVE; 1426 PetscMUMPS_c(mumps); 1427 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1428 1429 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1430 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1431 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1432 1433 /* create scatter scat_sol */ 1434 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1435 /* iidx: index for scatter mumps solution to petsc X */ 1436 1437 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1439 for (i=0; i<lsol_loc; i++) { 1440 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1441 1442 for (proc=0; proc<mumps->petsc_size; proc++) { 1443 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1444 myrstart = rstart[proc]; 1445 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1446 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1447 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1448 break; 1449 } 1450 } 1451 1452 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1456 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1461 1462 /* free spaces */ 1463 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1464 mumps->id.isol_loc = isol_loc_save; 1465 1466 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1467 ierr = PetscFree(idxx);CHKERRQ(ierr); 1468 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1469 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1470 if (!denseB) { 1471 if (!mumps->myid) { 1472 b = (Mat_MPIAIJ*)Bt->data; 1473 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1474 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1475 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1476 } 1477 } else { 1478 if (mumps->ICNTL20 == 0) { 1479 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1480 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1481 } 1482 } 1483 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1484 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1489 { 1490 PetscErrorCode ierr; 1491 PetscBool flg; 1492 Mat B; 1493 1494 PetscFunctionBegin; 1495 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1496 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1497 1498 /* Create B=Bt^T that uses Bt's data structure */ 1499 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1500 1501 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1502 ierr = MatDestroy(&B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #if !defined(PETSC_USE_COMPLEX) 1507 /* 1508 input: 1509 F: numeric factor 1510 output: 1511 nneg: total number of negative pivots 1512 nzero: total number of zero pivots 1513 npos: (global dimension of F) - nneg - nzero 1514 */ 1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1516 { 1517 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1518 PetscErrorCode ierr; 1519 PetscMPIInt size; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr); 1523 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1524 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1525 1526 if (nneg) *nneg = mumps->id.INFOG(12); 1527 if (nzero || npos) { 1528 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1529 if (nzero) *nzero = mumps->id.INFOG(28); 1530 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 #endif 1535 1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1537 { 1538 PetscErrorCode ierr; 1539 PetscInt i,nreqs; 1540 PetscMUMPSInt *irn,*jcn; 1541 PetscMPIInt count; 1542 PetscInt64 totnnz,remain; 1543 const PetscInt osize=mumps->omp_comm_size; 1544 PetscScalar *val; 1545 1546 PetscFunctionBegin; 1547 if (osize > 1) { 1548 if (reuse == MAT_INITIAL_MATRIX) { 1549 /* master first gathers counts of nonzeros to receive */ 1550 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1551 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr); 1552 1553 /* Then each computes number of send/recvs */ 1554 if (mumps->is_omp_master) { 1555 /* Start from 1 since self communication is not done in MPI */ 1556 nreqs = 0; 1557 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1558 } else { 1559 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1560 } 1561 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1562 1563 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1564 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1565 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1566 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1567 */ 1568 nreqs = 0; /* counter for actual send/recvs */ 1569 if (mumps->is_omp_master) { 1570 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1571 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1573 1574 /* Self communication */ 1575 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1576 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1577 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1578 1579 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1580 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1581 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1582 mumps->nnz = totnnz; 1583 mumps->irn = irn; 1584 mumps->jcn = jcn; 1585 mumps->val = mumps->val_alloc = val; 1586 1587 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1588 jcn += mumps->recvcount[0]; 1589 val += mumps->recvcount[0]; 1590 1591 /* Remote communication */ 1592 for (i=1; i<osize; i++) { 1593 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1594 remain = mumps->recvcount[i] - count; 1595 while (count>0) { 1596 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1597 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1598 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1599 irn += count; 1600 jcn += count; 1601 val += count; 1602 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1603 remain -= count; 1604 } 1605 } 1606 } else { 1607 irn = mumps->irn; 1608 jcn = mumps->jcn; 1609 val = mumps->val; 1610 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1611 remain = mumps->nnz - count; 1612 while (count>0) { 1613 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1614 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1615 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1616 irn += count; 1617 jcn += count; 1618 val += count; 1619 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1620 remain -= count; 1621 } 1622 } 1623 } else { 1624 nreqs = 0; 1625 if (mumps->is_omp_master) { 1626 val = mumps->val + mumps->recvcount[0]; 1627 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1628 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1629 remain = mumps->recvcount[i] - count; 1630 while (count>0) { 1631 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1632 val += count; 1633 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1634 remain -= count; 1635 } 1636 } 1637 } else { 1638 val = mumps->val; 1639 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1640 remain = mumps->nnz - count; 1641 while (count>0) { 1642 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1643 val += count; 1644 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1645 remain -= count; 1646 } 1647 } 1648 } 1649 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1650 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1656 { 1657 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1658 PetscErrorCode ierr; 1659 PetscBool isMPIAIJ; 1660 1661 PetscFunctionBegin; 1662 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1663 if (mumps->id.INFOG(1) == -6) { 1664 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1665 } 1666 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1671 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1672 1673 /* numerical factorization phase */ 1674 /*-------------------------------*/ 1675 mumps->id.job = JOB_FACTNUMERIC; 1676 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1677 if (!mumps->myid) { 1678 mumps->id.a = (MumpsScalar*)mumps->val; 1679 } 1680 } else { 1681 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1682 } 1683 PetscMUMPS_c(mumps); 1684 if (mumps->id.INFOG(1) < 0) { 1685 if (A->erroriffailure) { 1686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1687 } else { 1688 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1689 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1690 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1691 } else if (mumps->id.INFOG(1) == -13) { 1692 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1695 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1696 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1697 } else { 1698 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1699 F->factorerrortype = MAT_FACTOR_OTHER; 1700 } 1701 } 1702 } 1703 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1704 1705 F->assembled = PETSC_TRUE; 1706 mumps->matstruc = SAME_NONZERO_PATTERN; 1707 if (F->schur) { /* reset Schur status to unfactored */ 1708 #if defined(PETSC_HAVE_CUDA) 1709 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1710 #endif 1711 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1712 mumps->id.ICNTL(19) = 2; 1713 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1714 } 1715 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1716 } 1717 1718 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1719 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1720 1721 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1722 if (mumps->petsc_size > 1) { 1723 PetscInt lsol_loc; 1724 PetscScalar *sol_loc; 1725 1726 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1727 1728 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1729 if (mumps->x_seq) { 1730 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1731 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1732 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1733 } 1734 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1735 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1736 mumps->id.lsol_loc = lsol_loc; 1737 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1738 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1739 } 1740 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* Sets MUMPS options from the options database */ 1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1746 { 1747 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1748 PetscErrorCode ierr; 1749 PetscMUMPSInt icntl=0; 1750 PetscInt info[80],i,ninfo=80; 1751 PetscBool flg=PETSC_FALSE; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1755 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1756 if (flg) mumps->id.ICNTL(1) = icntl; 1757 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1758 if (flg) mumps->id.ICNTL(2) = icntl; 1759 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1760 if (flg) mumps->id.ICNTL(3) = icntl; 1761 1762 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1763 if (flg) mumps->id.ICNTL(4) = icntl; 1764 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1765 1766 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1767 if (flg) mumps->id.ICNTL(6) = icntl; 1768 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) { 1771 if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1772 mumps->id.ICNTL(7) = icntl; 1773 } 1774 1775 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1776 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1777 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1783 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1784 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1785 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1786 } 1787 1788 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1789 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1790 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1791 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1792 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1793 In short, we could not use distributed RHS with MPICH until v4.0b1. 1794 */ 1795 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1796 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1797 #else 1798 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1799 #endif 1800 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1801 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1802 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1803 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1804 #endif 1805 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1806 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1808 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1810 if (mumps->id.ICNTL(24)) { 1811 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1812 } 1813 1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1819 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1821 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1824 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1826 1827 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1830 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1831 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1832 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1833 1834 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1835 1836 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1837 if (ninfo) { 1838 if (PetscUnlikely(ninfo > 80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1839 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1840 mumps->ninfo = ninfo; 1841 for (i=0; i<ninfo; i++) { 1842 if (PetscUnlikely(info[i] < 0 || info[i]>80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1843 else mumps->info[i] = info[i]; 1844 } 1845 } 1846 1847 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1852 { 1853 PetscErrorCode ierr; 1854 PetscInt nthreads=0; 1855 1856 PetscFunctionBegin; 1857 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1858 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr); 1859 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */ 1860 1861 ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1862 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1863 ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1864 if (mumps->use_petsc_omp_support) { 1865 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1866 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1867 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1868 #else 1869 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1870 #endif 1871 } else { 1872 mumps->omp_comm = PETSC_COMM_SELF; 1873 mumps->mumps_comm = mumps->petsc_comm; 1874 mumps->is_omp_master = PETSC_TRUE; 1875 } 1876 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr); 1877 mumps->reqs = NULL; 1878 mumps->tag = 0; 1879 1880 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1881 if (mumps->mumps_comm != MPI_COMM_NULL) { 1882 ierr = PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr); 1883 } 1884 1885 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1886 mumps->id.job = JOB_INIT; 1887 mumps->id.par = 1; /* host participates factorizaton and solve */ 1888 mumps->id.sym = mumps->sym; 1889 1890 PetscMUMPS_c(mumps); 1891 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1892 1893 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1894 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1895 */ 1896 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRMPI(ierr); 1897 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr); 1898 1899 mumps->scat_rhs = NULL; 1900 mumps->scat_sol = NULL; 1901 1902 /* set PETSc-MUMPS default options - override MUMPS default */ 1903 mumps->id.ICNTL(3) = 0; 1904 mumps->id.ICNTL(4) = 0; 1905 if (mumps->petsc_size == 1) { 1906 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1907 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1908 } else { 1909 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1910 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1911 } 1912 1913 /* schur */ 1914 mumps->id.size_schur = 0; 1915 mumps->id.listvar_schur = NULL; 1916 mumps->id.schur = NULL; 1917 mumps->sizeredrhs = 0; 1918 mumps->schur_sol = NULL; 1919 mumps->schur_sizesol = 0; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1924 { 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 if (mumps->id.INFOG(1) < 0) { 1929 if (A->erroriffailure) { 1930 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1931 } else { 1932 if (mumps->id.INFOG(1) == -6) { 1933 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1934 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1935 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1936 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1937 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1938 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1939 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1940 } else { 1941 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1942 F->factorerrortype = MAT_FACTOR_OTHER; 1943 } 1944 } 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1950 { 1951 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1952 PetscErrorCode ierr; 1953 Vec b; 1954 const PetscInt M = A->rmap->N; 1955 1956 PetscFunctionBegin; 1957 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1958 1959 /* Set MUMPS options from the options database */ 1960 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1961 1962 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1963 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1964 1965 /* analysis phase */ 1966 /*----------------*/ 1967 mumps->id.job = JOB_FACTSYMBOLIC; 1968 mumps->id.n = M; 1969 switch (mumps->id.ICNTL(18)) { 1970 case 0: /* centralized assembled matrix input */ 1971 if (!mumps->myid) { 1972 mumps->id.nnz = mumps->nnz; 1973 mumps->id.irn = mumps->irn; 1974 mumps->id.jcn = mumps->jcn; 1975 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1976 if (r) { 1977 mumps->id.ICNTL(7) = 1; 1978 if (!mumps->myid) { 1979 const PetscInt *idx; 1980 PetscInt i; 1981 1982 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1983 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1984 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1985 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1986 } 1987 } 1988 } 1989 break; 1990 case 3: /* distributed assembled matrix input (size>1) */ 1991 mumps->id.nnz_loc = mumps->nnz; 1992 mumps->id.irn_loc = mumps->irn; 1993 mumps->id.jcn_loc = mumps->jcn; 1994 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1995 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1996 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1997 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1998 ierr = VecDestroy(&b);CHKERRQ(ierr); 1999 } 2000 break; 2001 } 2002 PetscMUMPS_c(mumps); 2003 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2004 2005 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2006 F->ops->solve = MatSolve_MUMPS; 2007 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2008 F->ops->matsolve = MatMatSolve_MUMPS; 2009 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2010 PetscFunctionReturn(0); 2011 } 2012 2013 /* Note the Petsc r and c permutations are ignored */ 2014 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2015 { 2016 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2017 PetscErrorCode ierr; 2018 Vec b; 2019 const PetscInt M = A->rmap->N; 2020 2021 PetscFunctionBegin; 2022 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2023 2024 /* Set MUMPS options from the options database */ 2025 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2026 2027 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2028 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2029 2030 /* analysis phase */ 2031 /*----------------*/ 2032 mumps->id.job = JOB_FACTSYMBOLIC; 2033 mumps->id.n = M; 2034 switch (mumps->id.ICNTL(18)) { 2035 case 0: /* centralized assembled matrix input */ 2036 if (!mumps->myid) { 2037 mumps->id.nnz = mumps->nnz; 2038 mumps->id.irn = mumps->irn; 2039 mumps->id.jcn = mumps->jcn; 2040 if (mumps->id.ICNTL(6)>1) { 2041 mumps->id.a = (MumpsScalar*)mumps->val; 2042 } 2043 } 2044 break; 2045 case 3: /* distributed assembled matrix input (size>1) */ 2046 mumps->id.nnz_loc = mumps->nnz; 2047 mumps->id.irn_loc = mumps->irn; 2048 mumps->id.jcn_loc = mumps->jcn; 2049 if (mumps->id.ICNTL(6)>1) { 2050 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2051 } 2052 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2053 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2054 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2055 ierr = VecDestroy(&b);CHKERRQ(ierr); 2056 } 2057 break; 2058 } 2059 PetscMUMPS_c(mumps); 2060 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2061 2062 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2063 F->ops->solve = MatSolve_MUMPS; 2064 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* Note the Petsc r permutation and factor info are ignored */ 2069 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2070 { 2071 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2072 PetscErrorCode ierr; 2073 Vec b; 2074 const PetscInt M = A->rmap->N; 2075 2076 PetscFunctionBegin; 2077 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2078 2079 /* Set MUMPS options from the options database */ 2080 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2081 2082 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2083 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2084 2085 /* analysis phase */ 2086 /*----------------*/ 2087 mumps->id.job = JOB_FACTSYMBOLIC; 2088 mumps->id.n = M; 2089 switch (mumps->id.ICNTL(18)) { 2090 case 0: /* centralized assembled matrix input */ 2091 if (!mumps->myid) { 2092 mumps->id.nnz = mumps->nnz; 2093 mumps->id.irn = mumps->irn; 2094 mumps->id.jcn = mumps->jcn; 2095 if (mumps->id.ICNTL(6)>1) { 2096 mumps->id.a = (MumpsScalar*)mumps->val; 2097 } 2098 } 2099 break; 2100 case 3: /* distributed assembled matrix input (size>1) */ 2101 mumps->id.nnz_loc = mumps->nnz; 2102 mumps->id.irn_loc = mumps->irn; 2103 mumps->id.jcn_loc = mumps->jcn; 2104 if (mumps->id.ICNTL(6)>1) { 2105 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2106 } 2107 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2108 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2109 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2110 ierr = VecDestroy(&b);CHKERRQ(ierr); 2111 } 2112 break; 2113 } 2114 PetscMUMPS_c(mumps); 2115 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2116 2117 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2118 F->ops->solve = MatSolve_MUMPS; 2119 F->ops->solvetranspose = MatSolve_MUMPS; 2120 F->ops->matsolve = MatMatSolve_MUMPS; 2121 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2122 #if defined(PETSC_USE_COMPLEX) 2123 F->ops->getinertia = NULL; 2124 #else 2125 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2126 #endif 2127 PetscFunctionReturn(0); 2128 } 2129 2130 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2131 { 2132 PetscErrorCode ierr; 2133 PetscBool iascii; 2134 PetscViewerFormat format; 2135 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2136 2137 PetscFunctionBegin; 2138 /* check if matrix is mumps type */ 2139 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2140 2141 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2142 if (iascii) { 2143 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2144 if (format == PETSC_VIEWER_ASCII_INFO) { 2145 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2146 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2150 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2158 if (mumps->id.ICNTL(11)>0) { 2159 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2160 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2165 } 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2169 /* ICNTL(15-17) not used */ 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2176 2177 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2183 2184 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2190 2191 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2192 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2193 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2196 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2197 2198 /* information local to each processor */ 2199 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2200 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2201 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2202 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2203 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2204 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2205 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2206 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2207 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2208 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2209 2210 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2213 2214 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2215 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2216 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2217 2218 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2219 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2220 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2221 2222 if (mumps->ninfo && mumps->ninfo <= 80) { 2223 PetscInt i; 2224 for (i=0; i<mumps->ninfo; i++) { 2225 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i]);CHKERRQ(ierr); 2226 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2227 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2228 } 2229 } 2230 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2231 2232 if (!mumps->myid) { /* information from the host */ 2233 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2234 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2237 2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr); 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr); 2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr); 2272 } 2273 } 2274 } 2275 PetscFunctionReturn(0); 2276 } 2277 2278 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2279 { 2280 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2281 2282 PetscFunctionBegin; 2283 info->block_size = 1.0; 2284 info->nz_allocated = mumps->id.INFOG(20); 2285 info->nz_used = mumps->id.INFOG(20); 2286 info->nz_unneeded = 0.0; 2287 info->assemblies = 0.0; 2288 info->mallocs = 0.0; 2289 info->memory = 0.0; 2290 info->fill_ratio_given = 0; 2291 info->fill_ratio_needed = 0; 2292 info->factor_mallocs = 0; 2293 PetscFunctionReturn(0); 2294 } 2295 2296 /* -------------------------------------------------------------------------------------------*/ 2297 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2298 { 2299 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2300 const PetscScalar *arr; 2301 const PetscInt *idxs; 2302 PetscInt size,i; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2307 if (mumps->petsc_size > 1) { 2308 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2309 2310 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2311 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr); 2312 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2313 } 2314 2315 /* Schur complement matrix */ 2316 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2317 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2318 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2319 mumps->id.schur = (MumpsScalar*)arr; 2320 mumps->id.size_schur = size; 2321 mumps->id.schur_lld = size; 2322 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2323 if (mumps->sym == 1) { 2324 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2325 } 2326 2327 /* MUMPS expects Fortran style indices */ 2328 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2329 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2330 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2331 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2332 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2333 if (mumps->petsc_size > 1) { 2334 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2335 } else { 2336 if (F->factortype == MAT_FACTOR_LU) { 2337 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2338 } else { 2339 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2340 } 2341 } 2342 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2343 mumps->id.ICNTL(26) = -1; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /* -------------------------------------------------------------------------------------------*/ 2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2349 { 2350 Mat St; 2351 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2352 PetscScalar *array; 2353 #if defined(PETSC_USE_COMPLEX) 2354 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2355 #endif 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2360 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2361 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2362 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2363 ierr = MatSetUp(St);CHKERRQ(ierr); 2364 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2365 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2366 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2367 PetscInt i,j,N=mumps->id.size_schur; 2368 for (i=0;i<N;i++) { 2369 for (j=0;j<N;j++) { 2370 #if !defined(PETSC_USE_COMPLEX) 2371 PetscScalar val = mumps->id.schur[i*N+j]; 2372 #else 2373 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2374 #endif 2375 array[j*N+i] = val; 2376 } 2377 } 2378 } else { /* stored by columns */ 2379 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2380 } 2381 } else { /* either full or lower-triangular (not packed) */ 2382 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2383 PetscInt i,j,N=mumps->id.size_schur; 2384 for (i=0;i<N;i++) { 2385 for (j=i;j<N;j++) { 2386 #if !defined(PETSC_USE_COMPLEX) 2387 PetscScalar val = mumps->id.schur[i*N+j]; 2388 #else 2389 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2390 #endif 2391 array[i*N+j] = val; 2392 array[j*N+i] = val; 2393 } 2394 } 2395 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2396 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2397 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2398 PetscInt i,j,N=mumps->id.size_schur; 2399 for (i=0;i<N;i++) { 2400 for (j=0;j<i+1;j++) { 2401 #if !defined(PETSC_USE_COMPLEX) 2402 PetscScalar val = mumps->id.schur[i*N+j]; 2403 #else 2404 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2405 #endif 2406 array[i*N+j] = val; 2407 array[j*N+i] = val; 2408 } 2409 } 2410 } 2411 } 2412 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2413 *S = St; 2414 PetscFunctionReturn(0); 2415 } 2416 2417 /* -------------------------------------------------------------------------------------------*/ 2418 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2419 { 2420 PetscErrorCode ierr; 2421 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2429 { 2430 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2431 2432 PetscFunctionBegin; 2433 *ival = mumps->id.ICNTL(icntl); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 /*@ 2438 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2439 2440 Logically Collective on Mat 2441 2442 Input Parameters: 2443 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2444 . icntl - index of MUMPS parameter array ICNTL() 2445 - ival - value of MUMPS ICNTL(icntl) 2446 2447 Options Database: 2448 . -mat_mumps_icntl_<icntl> <ival> 2449 2450 Level: beginner 2451 2452 References: 2453 . MUMPS Users' Guide 2454 2455 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2456 @*/ 2457 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2458 { 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 PetscValidType(F,1); 2463 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2464 PetscValidLogicalCollectiveInt(F,icntl,2); 2465 PetscValidLogicalCollectiveInt(F,ival,3); 2466 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 /*@ 2471 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2472 2473 Logically Collective on Mat 2474 2475 Input Parameters: 2476 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2477 - icntl - index of MUMPS parameter array ICNTL() 2478 2479 Output Parameter: 2480 . ival - value of MUMPS ICNTL(icntl) 2481 2482 Level: beginner 2483 2484 References: 2485 . MUMPS Users' Guide 2486 2487 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2488 @*/ 2489 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2490 { 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 PetscValidType(F,1); 2495 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2496 PetscValidLogicalCollectiveInt(F,icntl,2); 2497 PetscValidIntPointer(ival,3); 2498 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 /* -------------------------------------------------------------------------------------------*/ 2503 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2504 { 2505 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2506 2507 PetscFunctionBegin; 2508 mumps->id.CNTL(icntl) = val; 2509 PetscFunctionReturn(0); 2510 } 2511 2512 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2513 { 2514 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2515 2516 PetscFunctionBegin; 2517 *val = mumps->id.CNTL(icntl); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 /*@ 2522 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2523 2524 Logically Collective on Mat 2525 2526 Input Parameters: 2527 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2528 . icntl - index of MUMPS parameter array CNTL() 2529 - val - value of MUMPS CNTL(icntl) 2530 2531 Options Database: 2532 . -mat_mumps_cntl_<icntl> <val> 2533 2534 Level: beginner 2535 2536 References: 2537 . MUMPS Users' Guide 2538 2539 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2540 @*/ 2541 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2542 { 2543 PetscErrorCode ierr; 2544 2545 PetscFunctionBegin; 2546 PetscValidType(F,1); 2547 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2548 PetscValidLogicalCollectiveInt(F,icntl,2); 2549 PetscValidLogicalCollectiveReal(F,val,3); 2550 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 /*@ 2555 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2556 2557 Logically Collective on Mat 2558 2559 Input Parameters: 2560 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2561 - icntl - index of MUMPS parameter array CNTL() 2562 2563 Output Parameter: 2564 . val - value of MUMPS CNTL(icntl) 2565 2566 Level: beginner 2567 2568 References: 2569 . MUMPS Users' Guide 2570 2571 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2572 @*/ 2573 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2574 { 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 PetscValidType(F,1); 2579 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2580 PetscValidLogicalCollectiveInt(F,icntl,2); 2581 PetscValidRealPointer(val,3); 2582 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2587 { 2588 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2589 2590 PetscFunctionBegin; 2591 *info = mumps->id.INFO(icntl); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2596 { 2597 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2598 2599 PetscFunctionBegin; 2600 *infog = mumps->id.INFOG(icntl); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2605 { 2606 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2607 2608 PetscFunctionBegin; 2609 *rinfo = mumps->id.RINFO(icntl); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2614 { 2615 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2616 2617 PetscFunctionBegin; 2618 *rinfog = mumps->id.RINFOG(icntl); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2623 { 2624 PetscErrorCode ierr; 2625 Mat Bt = NULL,Btseq = NULL; 2626 PetscBool flg; 2627 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2628 PetscScalar *aa; 2629 PetscInt spnr,*ia,*ja,M,nrhs; 2630 2631 PetscFunctionBegin; 2632 PetscValidPointer(spRHS,2); 2633 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2634 if (flg) { 2635 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2636 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2637 2638 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2639 2640 if (mumps->petsc_size > 1) { 2641 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2642 Btseq = b->A; 2643 } else { 2644 Btseq = Bt; 2645 } 2646 2647 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2648 mumps->id.nrhs = nrhs; 2649 mumps->id.lrhs = M; 2650 mumps->id.rhs = NULL; 2651 2652 if (!mumps->myid) { 2653 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2654 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2655 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2656 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2657 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2658 } else { 2659 mumps->id.irhs_ptr = NULL; 2660 mumps->id.irhs_sparse = NULL; 2661 mumps->id.nz_rhs = 0; 2662 mumps->id.rhs_sparse = NULL; 2663 } 2664 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2665 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2666 2667 /* solve phase */ 2668 /*-------------*/ 2669 mumps->id.job = JOB_SOLVE; 2670 PetscMUMPS_c(mumps); 2671 if (mumps->id.INFOG(1) < 0) 2672 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2673 2674 if (!mumps->myid) { 2675 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2676 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2677 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 /*@ 2683 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2684 2685 Logically Collective on Mat 2686 2687 Input Parameters: 2688 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2689 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2690 2691 Output Parameter: 2692 . spRHS - requested entries of inverse of A 2693 2694 Level: beginner 2695 2696 References: 2697 . MUMPS Users' Guide 2698 2699 .seealso: MatGetFactor(), MatCreateTranspose() 2700 @*/ 2701 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2702 { 2703 PetscErrorCode ierr; 2704 2705 PetscFunctionBegin; 2706 PetscValidType(F,1); 2707 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2708 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2709 PetscFunctionReturn(0); 2710 } 2711 2712 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2713 { 2714 PetscErrorCode ierr; 2715 Mat spRHS; 2716 2717 PetscFunctionBegin; 2718 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2719 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2720 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 /*@ 2725 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2726 2727 Logically Collective on Mat 2728 2729 Input Parameters: 2730 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2731 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2732 2733 Output Parameter: 2734 . spRHST - requested entries of inverse of A^T 2735 2736 Level: beginner 2737 2738 References: 2739 . MUMPS Users' Guide 2740 2741 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2742 @*/ 2743 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2744 { 2745 PetscErrorCode ierr; 2746 PetscBool flg; 2747 2748 PetscFunctionBegin; 2749 PetscValidType(F,1); 2750 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2751 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2752 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2753 2754 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 /*@ 2759 MatMumpsGetInfo - Get MUMPS parameter INFO() 2760 2761 Logically Collective on Mat 2762 2763 Input Parameters: 2764 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2765 - icntl - index of MUMPS parameter array INFO() 2766 2767 Output Parameter: 2768 . ival - value of MUMPS INFO(icntl) 2769 2770 Level: beginner 2771 2772 References: 2773 . MUMPS Users' Guide 2774 2775 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2776 @*/ 2777 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2778 { 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 PetscValidType(F,1); 2783 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2784 PetscValidIntPointer(ival,3); 2785 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 2789 /*@ 2790 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2791 2792 Logically Collective on Mat 2793 2794 Input Parameters: 2795 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2796 - icntl - index of MUMPS parameter array INFOG() 2797 2798 Output Parameter: 2799 . ival - value of MUMPS INFOG(icntl) 2800 2801 Level: beginner 2802 2803 References: 2804 . MUMPS Users' Guide 2805 2806 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2807 @*/ 2808 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2809 { 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 PetscValidType(F,1); 2814 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2815 PetscValidIntPointer(ival,3); 2816 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 /*@ 2821 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2822 2823 Logically Collective on Mat 2824 2825 Input Parameters: 2826 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2827 - icntl - index of MUMPS parameter array RINFO() 2828 2829 Output Parameter: 2830 . val - value of MUMPS RINFO(icntl) 2831 2832 Level: beginner 2833 2834 References: 2835 . MUMPS Users' Guide 2836 2837 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2838 @*/ 2839 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2840 { 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 PetscValidType(F,1); 2845 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2846 PetscValidRealPointer(val,3); 2847 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2848 PetscFunctionReturn(0); 2849 } 2850 2851 /*@ 2852 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2853 2854 Logically Collective on Mat 2855 2856 Input Parameters: 2857 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2858 - icntl - index of MUMPS parameter array RINFOG() 2859 2860 Output Parameter: 2861 . val - value of MUMPS RINFOG(icntl) 2862 2863 Level: beginner 2864 2865 References: 2866 . MUMPS Users' Guide 2867 2868 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2869 @*/ 2870 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2871 { 2872 PetscErrorCode ierr; 2873 2874 PetscFunctionBegin; 2875 PetscValidType(F,1); 2876 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2877 PetscValidRealPointer(val,3); 2878 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 /*MC 2883 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2884 distributed and sequential matrices via the external package MUMPS. 2885 2886 Works with MATAIJ and MATSBAIJ matrices 2887 2888 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2889 2890 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2891 2892 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2893 2894 Options Database Keys: 2895 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2896 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2897 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2898 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2899 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2900 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2901 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2902 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2903 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2904 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2905 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2906 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2907 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2908 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2909 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2910 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2911 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2912 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2913 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2914 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2915 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2916 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2917 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2918 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2919 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2920 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2921 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2922 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2923 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2924 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2925 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2926 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2927 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2928 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2929 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2930 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2931 2932 Level: beginner 2933 2934 Notes: 2935 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2936 2937 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2938 $ KSPGetPC(ksp,&pc); 2939 $ PCFactorGetMatrix(pc,&mat); 2940 $ MatMumpsGetInfo(mat,....); 2941 $ MatMumpsGetInfog(mat,....); etc. 2942 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2943 2944 Using MUMPS with 64-bit integers 2945 MUMPS provides 64-bit integer support in two build modes: 2946 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2947 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2948 2949 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2950 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2951 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2952 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2953 2954 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2955 2956 Two modes to run MUMPS/PETSc with OpenMP 2957 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2958 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2959 2960 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2961 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2962 2963 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2964 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2965 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2966 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2967 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2968 2969 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2970 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2971 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2972 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2973 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2974 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2975 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2976 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2977 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2978 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2979 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2980 examine the mapping result. 2981 2982 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2983 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2984 calls omp_set_num_threads(m) internally before calling MUMPS. 2985 2986 References: 2987 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2988 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2989 2990 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix() 2991 2992 M*/ 2993 2994 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2995 { 2996 PetscFunctionBegin; 2997 *type = MATSOLVERMUMPS; 2998 PetscFunctionReturn(0); 2999 } 3000 3001 /* MatGetFactor for Seq and MPI AIJ matrices */ 3002 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3003 { 3004 Mat B; 3005 PetscErrorCode ierr; 3006 Mat_MUMPS *mumps; 3007 PetscBool isSeqAIJ; 3008 PetscMPIInt size; 3009 3010 PetscFunctionBegin; 3011 #if defined(PETSC_USE_COMPLEX) 3012 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3013 #endif 3014 /* Create the factorization matrix */ 3015 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3016 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3017 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3018 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3019 ierr = MatSetUp(B);CHKERRQ(ierr); 3020 3021 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3022 3023 B->ops->view = MatView_MUMPS; 3024 B->ops->getinfo = MatGetInfo_MUMPS; 3025 3026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3039 3040 if (ftype == MAT_FACTOR_LU) { 3041 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3042 B->factortype = MAT_FACTOR_LU; 3043 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3044 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3045 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3046 mumps->sym = 0; 3047 } else { 3048 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3049 B->factortype = MAT_FACTOR_CHOLESKY; 3050 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3051 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3052 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3053 #if defined(PETSC_USE_COMPLEX) 3054 mumps->sym = 2; 3055 #else 3056 if (A->spd_set && A->spd) mumps->sym = 1; 3057 else mumps->sym = 2; 3058 #endif 3059 } 3060 3061 /* set solvertype */ 3062 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3063 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3064 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3065 if (size == 1) { 3066 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3067 B->canuseordering = PETSC_TRUE; 3068 } 3069 B->ops->destroy = MatDestroy_MUMPS; 3070 B->data = (void*)mumps; 3071 3072 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3073 3074 *F = B; 3075 PetscFunctionReturn(0); 3076 } 3077 3078 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3079 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3080 { 3081 Mat B; 3082 PetscErrorCode ierr; 3083 Mat_MUMPS *mumps; 3084 PetscBool isSeqSBAIJ; 3085 PetscMPIInt size; 3086 3087 PetscFunctionBegin; 3088 #if defined(PETSC_USE_COMPLEX) 3089 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3090 #endif 3091 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3092 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3093 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3094 ierr = MatSetUp(B);CHKERRQ(ierr); 3095 3096 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3097 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3098 if (isSeqSBAIJ) { 3099 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3100 } else { 3101 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3102 } 3103 3104 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3105 B->ops->view = MatView_MUMPS; 3106 B->ops->getinfo = MatGetInfo_MUMPS; 3107 3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3117 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3121 3122 B->factortype = MAT_FACTOR_CHOLESKY; 3123 #if defined(PETSC_USE_COMPLEX) 3124 mumps->sym = 2; 3125 #else 3126 if (A->spd_set && A->spd) mumps->sym = 1; 3127 else mumps->sym = 2; 3128 #endif 3129 3130 /* set solvertype */ 3131 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3132 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3133 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3134 if (size == 1) { 3135 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3136 B->canuseordering = PETSC_TRUE; 3137 } 3138 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3139 B->ops->destroy = MatDestroy_MUMPS; 3140 B->data = (void*)mumps; 3141 3142 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3143 3144 *F = B; 3145 PetscFunctionReturn(0); 3146 } 3147 3148 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3149 { 3150 Mat B; 3151 PetscErrorCode ierr; 3152 Mat_MUMPS *mumps; 3153 PetscBool isSeqBAIJ; 3154 PetscMPIInt size; 3155 3156 PetscFunctionBegin; 3157 /* Create the factorization matrix */ 3158 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3159 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3160 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3161 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3162 ierr = MatSetUp(B);CHKERRQ(ierr); 3163 3164 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3165 if (ftype == MAT_FACTOR_LU) { 3166 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3167 B->factortype = MAT_FACTOR_LU; 3168 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3169 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3170 mumps->sym = 0; 3171 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3172 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3173 3174 B->ops->view = MatView_MUMPS; 3175 B->ops->getinfo = MatGetInfo_MUMPS; 3176 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3187 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3188 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3189 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3190 3191 /* set solvertype */ 3192 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3193 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3194 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3195 if (size == 1) { 3196 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3197 B->canuseordering = PETSC_TRUE; 3198 } 3199 B->ops->destroy = MatDestroy_MUMPS; 3200 B->data = (void*)mumps; 3201 3202 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3203 3204 *F = B; 3205 PetscFunctionReturn(0); 3206 } 3207 3208 /* MatGetFactor for Seq and MPI SELL matrices */ 3209 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3210 { 3211 Mat B; 3212 PetscErrorCode ierr; 3213 Mat_MUMPS *mumps; 3214 PetscBool isSeqSELL; 3215 PetscMPIInt size; 3216 3217 PetscFunctionBegin; 3218 /* Create the factorization matrix */ 3219 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3220 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3221 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3222 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3223 ierr = MatSetUp(B);CHKERRQ(ierr); 3224 3225 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3226 3227 B->ops->view = MatView_MUMPS; 3228 B->ops->getinfo = MatGetInfo_MUMPS; 3229 3230 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3231 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3232 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3233 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3234 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3235 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3236 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3237 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3238 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3239 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3241 3242 if (ftype == MAT_FACTOR_LU) { 3243 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3244 B->factortype = MAT_FACTOR_LU; 3245 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3246 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3247 mumps->sym = 0; 3248 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3249 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3250 3251 /* set solvertype */ 3252 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3253 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3255 if (size == 1) { 3256 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3257 B->canuseordering = PETSC_TRUE; 3258 } 3259 B->ops->destroy = MatDestroy_MUMPS; 3260 B->data = (void*)mumps; 3261 3262 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3263 3264 *F = B; 3265 PetscFunctionReturn(0); 3266 } 3267 3268 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3269 { 3270 PetscErrorCode ierr; 3271 3272 PetscFunctionBegin; 3273 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3274 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3275 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3276 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3277 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3278 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3279 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3280 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3281 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3282 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3283 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } 3286 3287