1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 Vec b_seq,x_seq; 170 PetscInt ninfo,*info; /* which INFO to display */ 171 PetscInt sizeredrhs; 172 PetscScalar *schur_sol; 173 PetscInt schur_sizesol; 174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 177 178 /* stuff used by petsc/mumps OpenMP support*/ 179 PetscBool use_petsc_omp_support; 180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 183 PetscMPIInt tag,omp_comm_size; 184 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 185 MPI_Request *reqs; 186 }; 187 188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 190 */ 191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 192 { 193 PetscErrorCode ierr; 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 231 mumps->id.size_schur = 0; 232 mumps->id.schur_lld = 0; 233 mumps->id.ICNTL(19) = 0; 234 PetscFunctionReturn(0); 235 } 236 237 /* solve with rhs in mumps->id.redrhs and return in the same location */ 238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 239 { 240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 241 Mat S,B,X; 242 MatFactorSchurStatus schurstatus; 243 PetscInt sizesol; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 263 } else { 264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 272 mumps->schur_sizesol = sizesol; 273 } 274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 278 #endif 279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 282 } else { 283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 284 } 285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 286 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 287 ierr = MatProductNumeric(X);CHKERRQ(ierr); 288 289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 290 break; 291 default: 292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 293 } 294 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 295 ierr = MatDestroy(&B);CHKERRQ(ierr); 296 ierr = MatDestroy(&X);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 307 PetscFunctionReturn(0); 308 } 309 if (!expansion) { /* prepare for the condensation step */ 310 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 311 /* allocate MUMPS internal array to store reduced right-hand sides */ 312 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 313 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 314 mumps->id.lredrhs = mumps->id.size_schur; 315 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 317 } 318 mumps->id.ICNTL(26) = 1; /* condensation phase */ 319 } else { /* prepare for the expansion step */ 320 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 321 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 322 mumps->id.ICNTL(26) = 2; /* expansion phase */ 323 PetscMUMPS_c(mumps); 324 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 325 /* restore defaults */ 326 mumps->id.ICNTL(26) = -1; 327 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 328 if (mumps->id.nrhs > 1) { 329 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 330 mumps->id.lredrhs = 0; 331 mumps->sizeredrhs = 0; 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 /* 338 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 339 340 input: 341 A - matrix in aij,baij or sbaij format 342 shift - 0: C style output triple; 1: Fortran style output triple. 343 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 344 MAT_REUSE_MATRIX: only the values in v array are updated 345 output: 346 nnz - dim of r, c, and v (number of local nonzero entries of A) 347 r, c, v - row and col index, matrix values (matrix triples) 348 349 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 350 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 351 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 352 353 */ 354 355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 356 { 357 const PetscScalar *av; 358 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 359 PetscInt64 nz,rnz,i,j,k; 360 PetscErrorCode ierr; 361 PetscMUMPSInt *row,*col; 362 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 366 mumps->val = (PetscScalar*)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 372 for (i=k=0; i<M; i++) { 373 rnz = ai[i+1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j=0; j<rnz; j++) { 376 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 377 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 390 { 391 PetscErrorCode ierr; 392 PetscInt64 nz,i,j,k,r; 393 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 394 PetscMUMPSInt *row,*col; 395 396 PetscFunctionBegin; 397 mumps->val = a->val; 398 if (reuse == MAT_INITIAL_MATRIX) { 399 nz = a->sliidx[a->totalslices]; 400 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 401 for (i=k=0; i<a->totalslices; i++) { 402 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 403 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 404 } 405 } 406 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 407 mumps->irn = row; 408 mumps->jcn = col; 409 mumps->nnz = nz; 410 } 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 415 { 416 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 417 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 418 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 419 PetscInt bs; 420 PetscErrorCode ierr; 421 PetscMUMPSInt *row,*col; 422 423 PetscFunctionBegin; 424 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 425 M = A->rmap->N/bs; 426 mumps->val = aa->a; 427 if (reuse == MAT_INITIAL_MATRIX) { 428 ai = aa->i; aj = aa->j; 429 nz = bs2*aa->nz; 430 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 431 for (i=0; i<M; i++) { 432 ajj = aj + ai[i]; 433 rnz = ai[i+1] - ai[i]; 434 for (k=0; k<rnz; k++) { 435 for (j=0; j<bs; j++) { 436 for (m=0; m<bs; m++) { 437 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 438 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 439 idx++; 440 } 441 } 442 } 443 } 444 mumps->irn = row; 445 mumps->jcn = col; 446 mumps->nnz = nz; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 452 { 453 const PetscInt *ai, *aj,*ajj; 454 PetscInt bs; 455 PetscInt64 nz,rnz,i,j,k,m; 456 PetscErrorCode ierr; 457 PetscMUMPSInt *row,*col; 458 PetscScalar *val; 459 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 460 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscBool hermitian; 463 #endif 464 465 PetscFunctionBegin; 466 #if defined(PETSC_USE_COMPLEX) 467 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 468 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 469 #endif 470 ai = aa->i; 471 aj = aa->j; 472 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 473 if (reuse == MAT_INITIAL_MATRIX) { 474 nz = aa->nz; 475 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 476 if (bs>1) { 477 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 478 mumps->val = mumps->val_alloc; 479 } else { 480 mumps->val = aa->a; 481 } 482 mumps->irn = row; 483 mumps->jcn = col; 484 } else { 485 if (bs == 1) mumps->val = aa->a; 486 row = mumps->irn; 487 col = mumps->jcn; 488 } 489 val = mumps->val; 490 491 nz = 0; 492 if (bs>1) { 493 for (i=0; i<mbs; i++) { 494 rnz = ai[i+1] - ai[i]; 495 ajj = aj + ai[i]; 496 for (j=0; j<rnz; j++) { 497 for (k=0; k<bs; k++) { 498 for (m=0; m<bs; m++) { 499 if (ajj[j]>i || k>=m) { 500 if (reuse == MAT_INITIAL_MATRIX) { 501 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 502 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 503 } 504 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 505 } 506 } 507 } 508 } 509 } 510 } else if (reuse == MAT_INITIAL_MATRIX) { 511 for (i=0; i<mbs; i++) { 512 rnz = ai[i+1] - ai[i]; 513 ajj = aj + ai[i]; 514 for (j=0; j<rnz; j++) { 515 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 516 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 517 nz++; 518 } 519 } 520 if (PetscUnlikely(nz != aa->nz)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 521 } 522 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 527 { 528 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 529 PetscInt64 nz,rnz,i,j; 530 const PetscScalar *av,*v1; 531 PetscScalar *val; 532 PetscErrorCode ierr; 533 PetscMUMPSInt *row,*col; 534 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 535 PetscBool missing; 536 #if defined(PETSC_USE_COMPLEX) 537 PetscBool hermitian; 538 #endif 539 540 PetscFunctionBegin; 541 #if defined(PETSC_USE_COMPLEX) 542 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 543 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 544 #endif 545 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 546 ai = aa->i; aj = aa->j; 547 adiag = aa->diag; 548 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 549 if (reuse == MAT_INITIAL_MATRIX) { 550 /* count nz in the upper triangular part of A */ 551 nz = 0; 552 if (missing) { 553 for (i=0; i<M; i++) { 554 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 555 for (j=ai[i];j<ai[i+1];j++) { 556 if (aj[j] < i) continue; 557 nz++; 558 } 559 } else { 560 nz += ai[i+1] - adiag[i]; 561 } 562 } 563 } else { 564 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 565 } 566 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 567 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 568 mumps->nnz = nz; 569 mumps->irn = row; 570 mumps->jcn = col; 571 mumps->val = mumps->val_alloc = val; 572 573 nz = 0; 574 if (missing) { 575 for (i=0; i<M; i++) { 576 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 577 for (j=ai[i];j<ai[i+1];j++) { 578 if (aj[j] < i) continue; 579 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 580 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 581 val[nz] = av[j]; 582 nz++; 583 } 584 } else { 585 rnz = ai[i+1] - adiag[i]; 586 ajj = aj + adiag[i]; 587 v1 = av + adiag[i]; 588 for (j=0; j<rnz; j++) { 589 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 590 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 591 val[nz++] = v1[j]; 592 } 593 } 594 } 595 } else { 596 for (i=0; i<M; i++) { 597 rnz = ai[i+1] - adiag[i]; 598 ajj = aj + adiag[i]; 599 v1 = av + adiag[i]; 600 for (j=0; j<rnz; j++) { 601 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 602 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 603 val[nz++] = v1[j]; 604 } 605 } 606 } 607 } else { 608 nz = 0; 609 val = mumps->val; 610 if (missing) { 611 for (i=0; i <M; i++) { 612 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 613 for (j=ai[i];j<ai[i+1];j++) { 614 if (aj[j] < i) continue; 615 val[nz++] = av[j]; 616 } 617 } else { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } else { 626 for (i=0; i <M; i++) { 627 rnz = ai[i+1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j=0; j<rnz; j++) { 630 val[nz++] = v1[j]; 631 } 632 } 633 } 634 } 635 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 640 { 641 PetscErrorCode ierr; 642 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 643 PetscInt bs; 644 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 645 PetscMUMPSInt *row,*col; 646 const PetscScalar *av,*bv,*v1,*v2; 647 PetscScalar *val; 648 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 649 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 650 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 651 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 652 #if defined(PETSC_USE_COMPLEX) 653 PetscBool hermitian; 654 #endif 655 656 PetscFunctionBegin; 657 #if defined(PETSC_USE_COMPLEX) 658 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 659 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 660 #endif 661 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 662 rstart = A->rmap->rstart; 663 ai = aa->i; 664 aj = aa->j; 665 bi = bb->i; 666 bj = bb->j; 667 av = aa->a; 668 bv = bb->a; 669 670 garray = mat->garray; 671 672 if (reuse == MAT_INITIAL_MATRIX) { 673 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 674 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 675 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 676 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 677 mumps->irn = row; 678 mumps->jcn = col; 679 mumps->val = mumps->val_alloc = val; 680 } else { 681 val = mumps->val; 682 } 683 684 jj = 0; irow = rstart; 685 for (i=0; i<mbs; i++) { 686 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687 countA = ai[i+1] - ai[i]; 688 countB = bi[i+1] - bi[i]; 689 bjj = bj + bi[i]; 690 v1 = av + ai[i]*bs2; 691 v2 = bv + bi[i]*bs2; 692 693 if (bs>1) { 694 /* A-part */ 695 for (j=0; j<countA; j++) { 696 for (k=0; k<bs; k++) { 697 for (m=0; m<bs; m++) { 698 if (rstart + ajj[j]*bs>irow || k>=m) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 701 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 702 } 703 val[jj++] = v1[j*bs2 + m + k*bs]; 704 } 705 } 706 } 707 } 708 709 /* B-part */ 710 for (j=0; j < countB; j++) { 711 for (k=0; k<bs; k++) { 712 for (m=0; m<bs; m++) { 713 if (reuse == MAT_INITIAL_MATRIX) { 714 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 715 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 716 } 717 val[jj++] = v2[j*bs2 + m + k*bs]; 718 } 719 } 720 } 721 } else { 722 /* A-part */ 723 for (j=0; j<countA; j++) { 724 if (reuse == MAT_INITIAL_MATRIX) { 725 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 726 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 727 } 728 val[jj++] = v1[j]; 729 } 730 731 /* B-part */ 732 for (j=0; j < countB; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 735 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 736 } 737 val[jj++] = v2[j]; 738 } 739 } 740 irow+=bs; 741 } 742 mumps->nnz = jj; 743 PetscFunctionReturn(0); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 747 { 748 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 749 PetscErrorCode ierr; 750 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 751 PetscMUMPSInt *row,*col; 752 const PetscScalar *av, *bv,*v1,*v2; 753 PetscScalar *val; 754 Mat Ad,Ao; 755 Mat_SeqAIJ *aa; 756 Mat_SeqAIJ *bb; 757 758 PetscFunctionBegin; 759 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 760 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 761 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 762 763 aa = (Mat_SeqAIJ*)(Ad)->data; 764 bb = (Mat_SeqAIJ*)(Ao)->data; 765 ai = aa->i; 766 aj = aa->j; 767 bi = bb->i; 768 bj = bb->j; 769 770 rstart = A->rmap->rstart; 771 772 if (reuse == MAT_INITIAL_MATRIX) { 773 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 774 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 775 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 776 mumps->nnz = nz; 777 mumps->irn = row; 778 mumps->jcn = col; 779 mumps->val = mumps->val_alloc = val; 780 } else { 781 val = mumps->val; 782 } 783 784 jj = 0; irow = rstart; 785 for (i=0; i<m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i+1] - ai[i]; 788 countB = bi[i+1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j=0; j<countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 797 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j=0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 806 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 813 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 823 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 824 const PetscInt bs2=mat->bs2; 825 PetscErrorCode ierr; 826 PetscInt bs; 827 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 828 PetscMUMPSInt *row,*col; 829 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 830 PetscScalar *val; 831 832 PetscFunctionBegin; 833 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 834 if (reuse == MAT_INITIAL_MATRIX) { 835 nz = bs2*(aa->nz + bb->nz); 836 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 837 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 838 mumps->nnz = nz; 839 mumps->irn = row; 840 mumps->jcn = col; 841 mumps->val = mumps->val_alloc = val; 842 } else { 843 val = mumps->val; 844 } 845 846 jj = 0; irow = rstart; 847 for (i=0; i<mbs; i++) { 848 countA = ai[i+1] - ai[i]; 849 countB = bi[i+1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2*ai[i]; 853 v2 = bv + bs2*bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k=0; k<countA; k++) { 858 for (j=0; j<bs; j++) { 859 for (n=0; n<bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 862 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k=0; k<countB; k++) { 872 for (j=0; j<bs; j++) { 873 for (n=0; n<bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 876 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 890 PetscErrorCode ierr; 891 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 892 PetscMUMPSInt *row,*col; 893 const PetscScalar *av, *bv,*v1,*v2; 894 PetscScalar *val; 895 Mat Ad,Ao; 896 Mat_SeqAIJ *aa; 897 Mat_SeqAIJ *bb; 898 #if defined(PETSC_USE_COMPLEX) 899 PetscBool hermitian; 900 #endif 901 902 PetscFunctionBegin; 903 #if defined(PETSC_USE_COMPLEX) 904 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 905 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 906 #endif 907 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 908 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 910 911 aa = (Mat_SeqAIJ*)(Ad)->data; 912 bb = (Mat_SeqAIJ*)(Ao)->data; 913 ai = aa->i; 914 aj = aa->j; 915 adiag = aa->diag; 916 bi = bb->i; 917 bj = bb->j; 918 919 rstart = A->rmap->rstart; 920 921 if (reuse == MAT_INITIAL_MATRIX) { 922 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 923 nzb = 0; /* num of upper triangular entries in mat->B */ 924 for (i=0; i<m; i++) { 925 nza += (ai[i+1] - adiag[i]); 926 countB = bi[i+1] - bi[i]; 927 bjj = bj + bi[i]; 928 for (j=0; j<countB; j++) { 929 if (garray[bjj[j]] > rstart) nzb++; 930 } 931 } 932 933 nz = nza + nzb; /* total nz of upper triangular part of mat */ 934 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 935 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 936 mumps->nnz = nz; 937 mumps->irn = row; 938 mumps->jcn = col; 939 mumps->val = mumps->val_alloc = val; 940 } else { 941 val = mumps->val; 942 } 943 944 jj = 0; irow = rstart; 945 for (i=0; i<m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i+1] - adiag[i]; 949 countB = bi[i+1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j=0; j<countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 957 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j=0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 967 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 975 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatDestroy_MUMPS(Mat A) 980 { 981 PetscErrorCode ierr; 982 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 983 984 PetscFunctionBegin; 985 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 986 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 988 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 989 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 990 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 991 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 994 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 995 mumps->id.job = JOB_END; 996 PetscMUMPS_c(mumps); 997 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1001 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1002 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1003 } 1004 #endif 1005 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1006 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1007 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1010 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);} 1011 ierr = PetscFree(A->data);CHKERRQ(ierr); 1012 1013 /* clear composed functions */ 1014 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1032 { 1033 PetscErrorCode ierr; 1034 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1035 const PetscMPIInt ompsize=mumps->omp_comm_size; 1036 PetscInt i,m,M,rstart; 1037 1038 PetscFunctionBegin; 1039 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1040 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1041 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1042 if (ompsize == 1) { 1043 if (!mumps->irhs_loc) { 1044 mumps->nloc_rhs = m; 1045 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1047 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1048 } 1049 mumps->id.rhs_loc = (MumpsScalar*)array; 1050 } else { 1051 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1052 const PetscInt *ranges; 1053 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1054 MPI_Group petsc_group,omp_group; 1055 PetscScalar *recvbuf=NULL; 1056 1057 if (mumps->is_omp_master) { 1058 /* Lazily initialize the omp stuff for distributed rhs */ 1059 if (!mumps->irhs_loc) { 1060 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1061 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1062 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr); 1063 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr); 1064 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1065 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr); 1066 1067 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1068 mumps->nloc_rhs = 0; 1069 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) { 1071 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1072 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1073 } 1074 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1075 for (j=k=0; j<ompsize; j++) { 1076 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1077 } 1078 1079 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1080 ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr); 1081 ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr); 1082 } 1083 1084 /* Realloc buffers when current nrhs is bigger than what we have met */ 1085 if (nrhs > mumps->max_nrhs) { 1086 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1087 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1088 mumps->max_nrhs = nrhs; 1089 } 1090 1091 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1092 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1093 mumps->rhs_disps[0] = 0; 1094 for (j=1; j<ompsize; j++) { 1095 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1096 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1097 } 1098 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1099 } 1100 1101 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1102 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr); 1103 1104 if (mumps->is_omp_master) { 1105 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1106 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1107 for (j=0; j<ompsize; j++) { 1108 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1109 dst = dstbase; 1110 for (i=0; i<nrhs; i++) { 1111 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1112 src += mumps->rhs_nrow[j]; 1113 dst += mumps->nloc_rhs; 1114 } 1115 dstbase += mumps->rhs_nrow[j]; 1116 } 1117 } 1118 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1119 } 1120 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1121 } 1122 mumps->id.nrhs = nrhs; 1123 mumps->id.nloc_rhs = mumps->nloc_rhs; 1124 mumps->id.lrhs_loc = mumps->nloc_rhs; 1125 mumps->id.irhs_loc = mumps->irhs_loc; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1130 { 1131 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1132 const PetscScalar *rarray = NULL; 1133 PetscScalar *array; 1134 IS is_iden,is_petsc; 1135 PetscErrorCode ierr; 1136 PetscInt i; 1137 PetscBool second_solve = PETSC_FALSE; 1138 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1139 1140 PetscFunctionBegin; 1141 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1142 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1143 1144 if (A->factorerrortype) { 1145 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1146 ierr = VecSetInf(x);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 mumps->id.nrhs = 1; 1151 if (mumps->petsc_size > 1) { 1152 if (mumps->ICNTL20 == 10) { 1153 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1154 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1155 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1156 } else { 1157 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1158 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 if (!mumps->myid) { 1161 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1162 mumps->id.rhs = (MumpsScalar*)array; 1163 } 1164 } 1165 } else { /* petsc_size == 1 */ 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1167 ierr = VecCopy(b,x);CHKERRQ(ierr); 1168 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1169 mumps->id.rhs = (MumpsScalar*)array; 1170 } 1171 1172 /* 1173 handle condensation step of Schur complement (if any) 1174 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1175 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1176 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1177 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1178 */ 1179 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1180 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1181 second_solve = PETSC_TRUE; 1182 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) { 1192 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1193 } 1194 1195 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1196 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1197 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1198 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1199 } 1200 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1201 PetscInt *isol2_loc=NULL; 1202 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1203 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1204 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1206 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1209 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1210 } 1211 1212 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 } 1215 1216 if (mumps->petsc_size > 1) { 1217 if (mumps->ICNTL20 == 10) { 1218 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1219 } else if (!mumps->myid) { 1220 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1221 } 1222 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1223 1224 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1229 { 1230 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 mumps->id.ICNTL(9) = 0; 1235 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1236 mumps->id.ICNTL(9) = 1; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1241 { 1242 PetscErrorCode ierr; 1243 Mat Bt = NULL; 1244 PetscBool denseX,denseB,flg,flgT; 1245 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1246 PetscInt i,nrhs,M; 1247 PetscScalar *array; 1248 const PetscScalar *rbray; 1249 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1250 PetscMUMPSInt *isol_loc,*isol_loc_save; 1251 PetscScalar *bray,*sol_loc,*sol_loc_save; 1252 IS is_to,is_from; 1253 PetscInt k,proc,j,m,myrstart; 1254 const PetscInt *rstart; 1255 Vec v_mpi,msol_loc; 1256 VecScatter scat_sol; 1257 Vec b_seq; 1258 VecScatter scat_rhs; 1259 PetscScalar *aa; 1260 PetscInt spnr,*ia,*ja; 1261 Mat_MPIAIJ *b = NULL; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1265 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1266 1267 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1268 if (denseB) { 1269 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1270 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1271 } else { /* sparse B */ 1272 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1273 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1274 if (flgT) { /* input B is transpose of actural RHS matrix, 1275 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1276 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1277 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1278 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1279 } 1280 1281 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1282 mumps->id.nrhs = nrhs; 1283 mumps->id.lrhs = M; 1284 mumps->id.rhs = NULL; 1285 1286 if (mumps->petsc_size == 1) { 1287 PetscScalar *aa; 1288 PetscInt spnr,*ia,*ja; 1289 PetscBool second_solve = PETSC_FALSE; 1290 1291 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1292 mumps->id.rhs = (MumpsScalar*)array; 1293 1294 if (denseB) { 1295 /* copy B to X */ 1296 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1297 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1299 } else { /* sparse B */ 1300 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1301 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1302 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1303 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1304 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1305 } 1306 /* handle condensation step of Schur complement (if any) */ 1307 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1308 second_solve = PETSC_TRUE; 1309 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1310 } 1311 /* solve phase */ 1312 /*-------------*/ 1313 mumps->id.job = JOB_SOLVE; 1314 PetscMUMPS_c(mumps); 1315 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1316 1317 /* handle expansion step of Schur complement (if any) */ 1318 if (second_solve) { 1319 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1320 } 1321 if (!denseB) { /* sparse B */ 1322 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1323 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1324 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1325 } 1326 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1331 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1332 1333 /* create msol_loc to hold mumps local solution */ 1334 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1335 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1336 1337 lsol_loc = mumps->id.lsol_loc; 1338 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1339 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1340 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1341 mumps->id.isol_loc = isol_loc; 1342 1343 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1344 1345 if (denseB) { 1346 if (mumps->ICNTL20 == 10) { 1347 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1348 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1349 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1350 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1351 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1352 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1353 } else { 1354 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1355 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1356 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1357 0, re-arrange B into desired order, which is a local operation. 1358 */ 1359 1360 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1361 /* wrap dense rhs matrix B into a vector v_mpi */ 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1364 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1365 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1366 1367 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1368 if (!mumps->myid) { 1369 PetscInt *idx; 1370 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1371 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1372 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1373 k = 0; 1374 for (proc=0; proc<mumps->petsc_size; proc++) { 1375 for (j=0; j<nrhs; j++) { 1376 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1377 } 1378 } 1379 1380 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1382 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1383 } else { 1384 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1385 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1387 } 1388 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1389 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 1394 if (!mumps->myid) { /* define rhs on the host */ 1395 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1396 mumps->id.rhs = (MumpsScalar*)bray; 1397 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1398 } 1399 } 1400 } else { /* sparse B */ 1401 b = (Mat_MPIAIJ*)Bt->data; 1402 1403 /* wrap dense X into a vector v_mpi */ 1404 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1405 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1406 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1407 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1408 1409 if (!mumps->myid) { 1410 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1411 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1412 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1413 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1414 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1415 } else { 1416 mumps->id.irhs_ptr = NULL; 1417 mumps->id.irhs_sparse = NULL; 1418 mumps->id.nz_rhs = 0; 1419 mumps->id.rhs_sparse = NULL; 1420 } 1421 } 1422 1423 /* solve phase */ 1424 /*-------------*/ 1425 mumps->id.job = JOB_SOLVE; 1426 PetscMUMPS_c(mumps); 1427 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1428 1429 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1430 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1431 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1432 1433 /* create scatter scat_sol */ 1434 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1435 /* iidx: index for scatter mumps solution to petsc X */ 1436 1437 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1439 for (i=0; i<lsol_loc; i++) { 1440 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1441 1442 for (proc=0; proc<mumps->petsc_size; proc++) { 1443 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1444 myrstart = rstart[proc]; 1445 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1446 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1447 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1448 break; 1449 } 1450 } 1451 1452 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1456 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1461 1462 /* free spaces */ 1463 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1464 mumps->id.isol_loc = isol_loc_save; 1465 1466 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1467 ierr = PetscFree(idxx);CHKERRQ(ierr); 1468 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1469 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1470 if (!denseB) { 1471 if (!mumps->myid) { 1472 b = (Mat_MPIAIJ*)Bt->data; 1473 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1474 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1475 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1476 } 1477 } else { 1478 if (mumps->ICNTL20 == 0) { 1479 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1480 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1481 } 1482 } 1483 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1484 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1489 { 1490 PetscErrorCode ierr; 1491 PetscBool flg; 1492 Mat B; 1493 1494 PetscFunctionBegin; 1495 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1496 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1497 1498 /* Create B=Bt^T that uses Bt's data structure */ 1499 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1500 1501 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1502 ierr = MatDestroy(&B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #if !defined(PETSC_USE_COMPLEX) 1507 /* 1508 input: 1509 F: numeric factor 1510 output: 1511 nneg: total number of negative pivots 1512 nzero: total number of zero pivots 1513 npos: (global dimension of F) - nneg - nzero 1514 */ 1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1516 { 1517 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1518 PetscErrorCode ierr; 1519 PetscMPIInt size; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr); 1523 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1524 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1525 1526 if (nneg) *nneg = mumps->id.INFOG(12); 1527 if (nzero || npos) { 1528 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1529 if (nzero) *nzero = mumps->id.INFOG(28); 1530 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 #endif 1535 1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1537 { 1538 PetscErrorCode ierr; 1539 PetscInt i,nreqs; 1540 PetscMUMPSInt *irn,*jcn; 1541 PetscMPIInt count; 1542 PetscInt64 totnnz,remain; 1543 const PetscInt osize=mumps->omp_comm_size; 1544 PetscScalar *val; 1545 1546 PetscFunctionBegin; 1547 if (osize > 1) { 1548 if (reuse == MAT_INITIAL_MATRIX) { 1549 /* master first gathers counts of nonzeros to receive */ 1550 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1551 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr); 1552 1553 /* Then each computes number of send/recvs */ 1554 if (mumps->is_omp_master) { 1555 /* Start from 1 since self communication is not done in MPI */ 1556 nreqs = 0; 1557 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1558 } else { 1559 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1560 } 1561 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1562 1563 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1564 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1565 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1566 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1567 */ 1568 nreqs = 0; /* counter for actual send/recvs */ 1569 if (mumps->is_omp_master) { 1570 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1571 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1573 1574 /* Self communication */ 1575 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1576 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1577 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1578 1579 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1580 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1581 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1582 mumps->nnz = totnnz; 1583 mumps->irn = irn; 1584 mumps->jcn = jcn; 1585 mumps->val = mumps->val_alloc = val; 1586 1587 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1588 jcn += mumps->recvcount[0]; 1589 val += mumps->recvcount[0]; 1590 1591 /* Remote communication */ 1592 for (i=1; i<osize; i++) { 1593 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1594 remain = mumps->recvcount[i] - count; 1595 while (count>0) { 1596 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1597 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1598 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1599 irn += count; 1600 jcn += count; 1601 val += count; 1602 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1603 remain -= count; 1604 } 1605 } 1606 } else { 1607 irn = mumps->irn; 1608 jcn = mumps->jcn; 1609 val = mumps->val; 1610 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1611 remain = mumps->nnz - count; 1612 while (count>0) { 1613 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1614 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1615 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1616 irn += count; 1617 jcn += count; 1618 val += count; 1619 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1620 remain -= count; 1621 } 1622 } 1623 } else { 1624 nreqs = 0; 1625 if (mumps->is_omp_master) { 1626 val = mumps->val + mumps->recvcount[0]; 1627 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1628 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1629 remain = mumps->recvcount[i] - count; 1630 while (count>0) { 1631 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1632 val += count; 1633 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1634 remain -= count; 1635 } 1636 } 1637 } else { 1638 val = mumps->val; 1639 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1640 remain = mumps->nnz - count; 1641 while (count>0) { 1642 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1643 val += count; 1644 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1645 remain -= count; 1646 } 1647 } 1648 } 1649 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1650 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1656 { 1657 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1658 PetscErrorCode ierr; 1659 PetscBool isMPIAIJ; 1660 1661 PetscFunctionBegin; 1662 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1663 if (mumps->id.INFOG(1) == -6) { 1664 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1665 } 1666 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1671 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1672 1673 /* numerical factorization phase */ 1674 /*-------------------------------*/ 1675 mumps->id.job = JOB_FACTNUMERIC; 1676 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1677 if (!mumps->myid) { 1678 mumps->id.a = (MumpsScalar*)mumps->val; 1679 } 1680 } else { 1681 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1682 } 1683 PetscMUMPS_c(mumps); 1684 if (mumps->id.INFOG(1) < 0) { 1685 if (A->erroriffailure) { 1686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1687 } else { 1688 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1689 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1690 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1691 } else if (mumps->id.INFOG(1) == -13) { 1692 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1695 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1696 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1697 } else { 1698 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1699 F->factorerrortype = MAT_FACTOR_OTHER; 1700 } 1701 } 1702 } 1703 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1704 1705 F->assembled = PETSC_TRUE; 1706 1707 if (F->schur) { /* reset Schur status to unfactored */ 1708 #if defined(PETSC_HAVE_CUDA) 1709 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1710 #endif 1711 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1712 mumps->id.ICNTL(19) = 2; 1713 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1714 } 1715 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1716 } 1717 1718 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1719 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1720 1721 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1722 if (mumps->petsc_size > 1) { 1723 PetscInt lsol_loc; 1724 PetscScalar *sol_loc; 1725 1726 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1727 1728 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1729 if (mumps->x_seq) { 1730 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1731 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1732 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1733 } 1734 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1735 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1736 mumps->id.lsol_loc = lsol_loc; 1737 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1738 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1739 } 1740 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* Sets MUMPS options from the options database */ 1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1746 { 1747 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1748 PetscErrorCode ierr; 1749 PetscMUMPSInt icntl=0; 1750 PetscInt info[80],i,ninfo=80; 1751 PetscBool flg=PETSC_FALSE; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1755 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1756 if (flg) mumps->id.ICNTL(1) = icntl; 1757 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1758 if (flg) mumps->id.ICNTL(2) = icntl; 1759 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1760 if (flg) mumps->id.ICNTL(3) = icntl; 1761 1762 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1763 if (flg) mumps->id.ICNTL(4) = icntl; 1764 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1765 1766 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1767 if (flg) mumps->id.ICNTL(6) = icntl; 1768 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) { 1771 if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1772 mumps->id.ICNTL(7) = icntl; 1773 } 1774 1775 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1776 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1777 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1783 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1784 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1785 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1786 } 1787 1788 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1789 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1790 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1791 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1792 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1793 In short, we could not use distributed RHS with MPICH until v4.0b1. 1794 */ 1795 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1796 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1797 #else 1798 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1799 #endif 1800 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1801 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1802 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1803 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1804 #endif 1805 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1806 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1808 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1810 if (mumps->id.ICNTL(24)) { 1811 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1812 } 1813 1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1819 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1821 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1824 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1826 1827 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1830 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1831 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1832 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1833 1834 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1835 1836 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1837 if (ninfo) { 1838 if (PetscUnlikely(ninfo > 80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1839 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1840 mumps->ninfo = ninfo; 1841 for (i=0; i<ninfo; i++) { 1842 if (PetscUnlikely(info[i] < 0 || info[i]>80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1843 else mumps->info[i] = info[i]; 1844 } 1845 } 1846 1847 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1852 { 1853 PetscErrorCode ierr; 1854 PetscInt nthreads=0; 1855 1856 PetscFunctionBegin; 1857 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1858 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr); 1859 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */ 1860 1861 ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1862 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1863 ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1864 if (mumps->use_petsc_omp_support) { 1865 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1866 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1867 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1868 #else 1869 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1870 #endif 1871 } else { 1872 mumps->omp_comm = PETSC_COMM_SELF; 1873 mumps->mumps_comm = mumps->petsc_comm; 1874 mumps->is_omp_master = PETSC_TRUE; 1875 } 1876 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr); 1877 mumps->reqs = NULL; 1878 mumps->tag = 0; 1879 1880 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1881 if (mumps->mumps_comm != MPI_COMM_NULL) { 1882 ierr = PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr); 1883 } 1884 1885 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1886 mumps->id.job = JOB_INIT; 1887 mumps->id.par = 1; /* host participates factorizaton and solve */ 1888 mumps->id.sym = mumps->sym; 1889 1890 PetscMUMPS_c(mumps); 1891 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1892 1893 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1894 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1895 */ 1896 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRMPI(ierr); 1897 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr); 1898 1899 mumps->scat_rhs = NULL; 1900 mumps->scat_sol = NULL; 1901 1902 /* set PETSc-MUMPS default options - override MUMPS default */ 1903 mumps->id.ICNTL(3) = 0; 1904 mumps->id.ICNTL(4) = 0; 1905 if (mumps->petsc_size == 1) { 1906 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1907 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1908 } else { 1909 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1910 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1911 } 1912 1913 /* schur */ 1914 mumps->id.size_schur = 0; 1915 mumps->id.listvar_schur = NULL; 1916 mumps->id.schur = NULL; 1917 mumps->sizeredrhs = 0; 1918 mumps->schur_sol = NULL; 1919 mumps->schur_sizesol = 0; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1924 { 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 if (mumps->id.INFOG(1) < 0) { 1929 if (A->erroriffailure) { 1930 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1931 } else { 1932 if (mumps->id.INFOG(1) == -6) { 1933 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1934 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1935 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1936 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1937 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1938 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1939 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1940 } else { 1941 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1942 F->factorerrortype = MAT_FACTOR_OTHER; 1943 } 1944 } 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1950 { 1951 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1952 PetscErrorCode ierr; 1953 Vec b; 1954 const PetscInt M = A->rmap->N; 1955 1956 PetscFunctionBegin; 1957 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1958 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /* Set MUMPS options from the options database */ 1963 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1964 1965 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1966 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1967 1968 /* analysis phase */ 1969 /*----------------*/ 1970 mumps->id.job = JOB_FACTSYMBOLIC; 1971 mumps->id.n = M; 1972 switch (mumps->id.ICNTL(18)) { 1973 case 0: /* centralized assembled matrix input */ 1974 if (!mumps->myid) { 1975 mumps->id.nnz = mumps->nnz; 1976 mumps->id.irn = mumps->irn; 1977 mumps->id.jcn = mumps->jcn; 1978 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1979 if (r) { 1980 mumps->id.ICNTL(7) = 1; 1981 if (!mumps->myid) { 1982 const PetscInt *idx; 1983 PetscInt i; 1984 1985 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1986 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1987 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1988 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1989 } 1990 } 1991 } 1992 break; 1993 case 3: /* distributed assembled matrix input (size>1) */ 1994 mumps->id.nnz_loc = mumps->nnz; 1995 mumps->id.irn_loc = mumps->irn; 1996 mumps->id.jcn_loc = mumps->jcn; 1997 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1998 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1999 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2000 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2001 ierr = VecDestroy(&b);CHKERRQ(ierr); 2002 } 2003 break; 2004 } 2005 PetscMUMPS_c(mumps); 2006 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2007 2008 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2009 F->ops->solve = MatSolve_MUMPS; 2010 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2011 F->ops->matsolve = MatMatSolve_MUMPS; 2012 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2013 2014 mumps->matstruc = SAME_NONZERO_PATTERN; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 /* Note the Petsc r and c permutations are ignored */ 2019 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2020 { 2021 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2022 PetscErrorCode ierr; 2023 Vec b; 2024 const PetscInt M = A->rmap->N; 2025 2026 PetscFunctionBegin; 2027 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2028 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2029 PetscFunctionReturn(0); 2030 } 2031 2032 /* Set MUMPS options from the options database */ 2033 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2034 2035 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2036 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2037 2038 /* analysis phase */ 2039 /*----------------*/ 2040 mumps->id.job = JOB_FACTSYMBOLIC; 2041 mumps->id.n = M; 2042 switch (mumps->id.ICNTL(18)) { 2043 case 0: /* centralized assembled matrix input */ 2044 if (!mumps->myid) { 2045 mumps->id.nnz = mumps->nnz; 2046 mumps->id.irn = mumps->irn; 2047 mumps->id.jcn = mumps->jcn; 2048 if (mumps->id.ICNTL(6)>1) { 2049 mumps->id.a = (MumpsScalar*)mumps->val; 2050 } 2051 } 2052 break; 2053 case 3: /* distributed assembled matrix input (size>1) */ 2054 mumps->id.nnz_loc = mumps->nnz; 2055 mumps->id.irn_loc = mumps->irn; 2056 mumps->id.jcn_loc = mumps->jcn; 2057 if (mumps->id.ICNTL(6)>1) { 2058 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2059 } 2060 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2061 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2062 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2063 ierr = VecDestroy(&b);CHKERRQ(ierr); 2064 } 2065 break; 2066 } 2067 PetscMUMPS_c(mumps); 2068 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2069 2070 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2071 F->ops->solve = MatSolve_MUMPS; 2072 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2073 2074 mumps->matstruc = SAME_NONZERO_PATTERN; 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* Note the Petsc r permutation and factor info are ignored */ 2079 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2080 { 2081 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2082 PetscErrorCode ierr; 2083 Vec b; 2084 const PetscInt M = A->rmap->N; 2085 2086 PetscFunctionBegin; 2087 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2088 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2089 PetscFunctionReturn(0); 2090 } 2091 2092 /* Set MUMPS options from the options database */ 2093 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2094 2095 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2096 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2097 2098 /* analysis phase */ 2099 /*----------------*/ 2100 mumps->id.job = JOB_FACTSYMBOLIC; 2101 mumps->id.n = M; 2102 switch (mumps->id.ICNTL(18)) { 2103 case 0: /* centralized assembled matrix input */ 2104 if (!mumps->myid) { 2105 mumps->id.nnz = mumps->nnz; 2106 mumps->id.irn = mumps->irn; 2107 mumps->id.jcn = mumps->jcn; 2108 if (mumps->id.ICNTL(6)>1) { 2109 mumps->id.a = (MumpsScalar*)mumps->val; 2110 } 2111 } 2112 break; 2113 case 3: /* distributed assembled matrix input (size>1) */ 2114 mumps->id.nnz_loc = mumps->nnz; 2115 mumps->id.irn_loc = mumps->irn; 2116 mumps->id.jcn_loc = mumps->jcn; 2117 if (mumps->id.ICNTL(6)>1) { 2118 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2119 } 2120 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2121 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2122 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2123 ierr = VecDestroy(&b);CHKERRQ(ierr); 2124 } 2125 break; 2126 } 2127 PetscMUMPS_c(mumps); 2128 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2129 2130 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2131 F->ops->solve = MatSolve_MUMPS; 2132 F->ops->solvetranspose = MatSolve_MUMPS; 2133 F->ops->matsolve = MatMatSolve_MUMPS; 2134 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2135 #if defined(PETSC_USE_COMPLEX) 2136 F->ops->getinertia = NULL; 2137 #else 2138 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2139 #endif 2140 2141 mumps->matstruc = SAME_NONZERO_PATTERN; 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2146 { 2147 PetscErrorCode ierr; 2148 PetscBool iascii; 2149 PetscViewerFormat format; 2150 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2151 2152 PetscFunctionBegin; 2153 /* check if matrix is mumps type */ 2154 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2155 2156 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2157 if (iascii) { 2158 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2159 if (format == PETSC_VIEWER_ASCII_INFO) { 2160 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2173 if (mumps->id.ICNTL(11)>0) { 2174 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2175 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2176 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2180 } 2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2183 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2184 /* ICNTL(15-17) not used */ 2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2190 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2191 2192 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2193 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2196 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2197 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2198 2199 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2200 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2201 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2202 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2203 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2204 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2205 2206 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2207 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2208 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2209 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2210 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2211 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2212 2213 /* information local to each processor */ 2214 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2215 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2216 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2217 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2218 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2219 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2220 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2221 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2222 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2223 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2224 2225 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2226 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2227 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2228 2229 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2230 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2231 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2232 2233 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2234 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2235 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2236 2237 if (mumps->ninfo && mumps->ninfo <= 80) { 2238 PetscInt i; 2239 for (i=0; i<mumps->ninfo; i++) { 2240 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i]);CHKERRQ(ierr); 2241 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2242 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2243 } 2244 } 2245 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2246 2247 if (!mumps->myid) { /* information from the host */ 2248 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2252 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr); 2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr); 2272 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr); 2273 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr); 2274 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr); 2275 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr); 2276 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2277 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2278 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2279 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2280 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2281 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2282 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2283 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr); 2284 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr); 2285 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr); 2286 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr); 2287 } 2288 } 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2294 { 2295 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2296 2297 PetscFunctionBegin; 2298 info->block_size = 1.0; 2299 info->nz_allocated = mumps->id.INFOG(20); 2300 info->nz_used = mumps->id.INFOG(20); 2301 info->nz_unneeded = 0.0; 2302 info->assemblies = 0.0; 2303 info->mallocs = 0.0; 2304 info->memory = 0.0; 2305 info->fill_ratio_given = 0; 2306 info->fill_ratio_needed = 0; 2307 info->factor_mallocs = 0; 2308 PetscFunctionReturn(0); 2309 } 2310 2311 /* -------------------------------------------------------------------------------------------*/ 2312 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2313 { 2314 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2315 const PetscScalar *arr; 2316 const PetscInt *idxs; 2317 PetscInt size,i; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2322 if (mumps->petsc_size > 1) { 2323 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2324 2325 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2326 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr); 2327 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2328 } 2329 2330 /* Schur complement matrix */ 2331 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2332 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2333 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2334 mumps->id.schur = (MumpsScalar*)arr; 2335 mumps->id.size_schur = size; 2336 mumps->id.schur_lld = size; 2337 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2338 if (mumps->sym == 1) { 2339 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2340 } 2341 2342 /* MUMPS expects Fortran style indices */ 2343 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2344 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2345 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2346 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2347 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2348 if (mumps->petsc_size > 1) { 2349 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2350 } else { 2351 if (F->factortype == MAT_FACTOR_LU) { 2352 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2353 } else { 2354 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2355 } 2356 } 2357 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2358 mumps->id.ICNTL(26) = -1; 2359 PetscFunctionReturn(0); 2360 } 2361 2362 /* -------------------------------------------------------------------------------------------*/ 2363 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2364 { 2365 Mat St; 2366 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2367 PetscScalar *array; 2368 #if defined(PETSC_USE_COMPLEX) 2369 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2370 #endif 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2375 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2376 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2377 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2378 ierr = MatSetUp(St);CHKERRQ(ierr); 2379 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2380 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2381 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2382 PetscInt i,j,N=mumps->id.size_schur; 2383 for (i=0;i<N;i++) { 2384 for (j=0;j<N;j++) { 2385 #if !defined(PETSC_USE_COMPLEX) 2386 PetscScalar val = mumps->id.schur[i*N+j]; 2387 #else 2388 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2389 #endif 2390 array[j*N+i] = val; 2391 } 2392 } 2393 } else { /* stored by columns */ 2394 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2395 } 2396 } else { /* either full or lower-triangular (not packed) */ 2397 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2398 PetscInt i,j,N=mumps->id.size_schur; 2399 for (i=0;i<N;i++) { 2400 for (j=i;j<N;j++) { 2401 #if !defined(PETSC_USE_COMPLEX) 2402 PetscScalar val = mumps->id.schur[i*N+j]; 2403 #else 2404 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2405 #endif 2406 array[i*N+j] = val; 2407 array[j*N+i] = val; 2408 } 2409 } 2410 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2411 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2412 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2413 PetscInt i,j,N=mumps->id.size_schur; 2414 for (i=0;i<N;i++) { 2415 for (j=0;j<i+1;j++) { 2416 #if !defined(PETSC_USE_COMPLEX) 2417 PetscScalar val = mumps->id.schur[i*N+j]; 2418 #else 2419 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2420 #endif 2421 array[i*N+j] = val; 2422 array[j*N+i] = val; 2423 } 2424 } 2425 } 2426 } 2427 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2428 *S = St; 2429 PetscFunctionReturn(0); 2430 } 2431 2432 /* -------------------------------------------------------------------------------------------*/ 2433 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2434 { 2435 PetscErrorCode ierr; 2436 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2437 2438 PetscFunctionBegin; 2439 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2444 { 2445 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2446 2447 PetscFunctionBegin; 2448 *ival = mumps->id.ICNTL(icntl); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 /*@ 2453 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2454 2455 Logically Collective on Mat 2456 2457 Input Parameters: 2458 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2459 . icntl - index of MUMPS parameter array ICNTL() 2460 - ival - value of MUMPS ICNTL(icntl) 2461 2462 Options Database: 2463 . -mat_mumps_icntl_<icntl> <ival> 2464 2465 Level: beginner 2466 2467 References: 2468 . MUMPS Users' Guide 2469 2470 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2471 @*/ 2472 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2473 { 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 PetscValidType(F,1); 2478 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2479 PetscValidLogicalCollectiveInt(F,icntl,2); 2480 PetscValidLogicalCollectiveInt(F,ival,3); 2481 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 /*@ 2486 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2487 2488 Logically Collective on Mat 2489 2490 Input Parameters: 2491 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2492 - icntl - index of MUMPS parameter array ICNTL() 2493 2494 Output Parameter: 2495 . ival - value of MUMPS ICNTL(icntl) 2496 2497 Level: beginner 2498 2499 References: 2500 . MUMPS Users' Guide 2501 2502 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2503 @*/ 2504 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2505 { 2506 PetscErrorCode ierr; 2507 2508 PetscFunctionBegin; 2509 PetscValidType(F,1); 2510 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2511 PetscValidLogicalCollectiveInt(F,icntl,2); 2512 PetscValidIntPointer(ival,3); 2513 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 /* -------------------------------------------------------------------------------------------*/ 2518 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2519 { 2520 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2521 2522 PetscFunctionBegin; 2523 mumps->id.CNTL(icntl) = val; 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2528 { 2529 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2530 2531 PetscFunctionBegin; 2532 *val = mumps->id.CNTL(icntl); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 /*@ 2537 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2538 2539 Logically Collective on Mat 2540 2541 Input Parameters: 2542 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2543 . icntl - index of MUMPS parameter array CNTL() 2544 - val - value of MUMPS CNTL(icntl) 2545 2546 Options Database: 2547 . -mat_mumps_cntl_<icntl> <val> 2548 2549 Level: beginner 2550 2551 References: 2552 . MUMPS Users' Guide 2553 2554 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2555 @*/ 2556 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2557 { 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 PetscValidType(F,1); 2562 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2563 PetscValidLogicalCollectiveInt(F,icntl,2); 2564 PetscValidLogicalCollectiveReal(F,val,3); 2565 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /*@ 2570 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2571 2572 Logically Collective on Mat 2573 2574 Input Parameters: 2575 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2576 - icntl - index of MUMPS parameter array CNTL() 2577 2578 Output Parameter: 2579 . val - value of MUMPS CNTL(icntl) 2580 2581 Level: beginner 2582 2583 References: 2584 . MUMPS Users' Guide 2585 2586 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2587 @*/ 2588 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2589 { 2590 PetscErrorCode ierr; 2591 2592 PetscFunctionBegin; 2593 PetscValidType(F,1); 2594 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2595 PetscValidLogicalCollectiveInt(F,icntl,2); 2596 PetscValidRealPointer(val,3); 2597 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2602 { 2603 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2604 2605 PetscFunctionBegin; 2606 *info = mumps->id.INFO(icntl); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2611 { 2612 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2613 2614 PetscFunctionBegin; 2615 *infog = mumps->id.INFOG(icntl); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2620 { 2621 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2622 2623 PetscFunctionBegin; 2624 *rinfo = mumps->id.RINFO(icntl); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2629 { 2630 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2631 2632 PetscFunctionBegin; 2633 *rinfog = mumps->id.RINFOG(icntl); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2638 { 2639 PetscErrorCode ierr; 2640 Mat Bt = NULL,Btseq = NULL; 2641 PetscBool flg; 2642 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2643 PetscScalar *aa; 2644 PetscInt spnr,*ia,*ja,M,nrhs; 2645 2646 PetscFunctionBegin; 2647 PetscValidPointer(spRHS,2); 2648 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2649 if (flg) { 2650 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2651 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2652 2653 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2654 2655 if (mumps->petsc_size > 1) { 2656 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2657 Btseq = b->A; 2658 } else { 2659 Btseq = Bt; 2660 } 2661 2662 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2663 mumps->id.nrhs = nrhs; 2664 mumps->id.lrhs = M; 2665 mumps->id.rhs = NULL; 2666 2667 if (!mumps->myid) { 2668 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2669 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2670 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2671 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2672 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2673 } else { 2674 mumps->id.irhs_ptr = NULL; 2675 mumps->id.irhs_sparse = NULL; 2676 mumps->id.nz_rhs = 0; 2677 mumps->id.rhs_sparse = NULL; 2678 } 2679 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2680 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2681 2682 /* solve phase */ 2683 /*-------------*/ 2684 mumps->id.job = JOB_SOLVE; 2685 PetscMUMPS_c(mumps); 2686 if (mumps->id.INFOG(1) < 0) 2687 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2688 2689 if (!mumps->myid) { 2690 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2691 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2692 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2693 } 2694 PetscFunctionReturn(0); 2695 } 2696 2697 /*@ 2698 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2699 2700 Logically Collective on Mat 2701 2702 Input Parameters: 2703 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2704 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2705 2706 Output Parameter: 2707 . spRHS - requested entries of inverse of A 2708 2709 Level: beginner 2710 2711 References: 2712 . MUMPS Users' Guide 2713 2714 .seealso: MatGetFactor(), MatCreateTranspose() 2715 @*/ 2716 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2717 { 2718 PetscErrorCode ierr; 2719 2720 PetscFunctionBegin; 2721 PetscValidType(F,1); 2722 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2723 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2728 { 2729 PetscErrorCode ierr; 2730 Mat spRHS; 2731 2732 PetscFunctionBegin; 2733 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2734 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2735 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 /*@ 2740 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2741 2742 Logically Collective on Mat 2743 2744 Input Parameters: 2745 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2746 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2747 2748 Output Parameter: 2749 . spRHST - requested entries of inverse of A^T 2750 2751 Level: beginner 2752 2753 References: 2754 . MUMPS Users' Guide 2755 2756 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2757 @*/ 2758 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2759 { 2760 PetscErrorCode ierr; 2761 PetscBool flg; 2762 2763 PetscFunctionBegin; 2764 PetscValidType(F,1); 2765 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2766 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2767 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2768 2769 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 /*@ 2774 MatMumpsGetInfo - Get MUMPS parameter INFO() 2775 2776 Logically Collective on Mat 2777 2778 Input Parameters: 2779 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2780 - icntl - index of MUMPS parameter array INFO() 2781 2782 Output Parameter: 2783 . ival - value of MUMPS INFO(icntl) 2784 2785 Level: beginner 2786 2787 References: 2788 . MUMPS Users' Guide 2789 2790 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2791 @*/ 2792 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2793 { 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 PetscValidType(F,1); 2798 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2799 PetscValidIntPointer(ival,3); 2800 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 /*@ 2805 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2806 2807 Logically Collective on Mat 2808 2809 Input Parameters: 2810 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2811 - icntl - index of MUMPS parameter array INFOG() 2812 2813 Output Parameter: 2814 . ival - value of MUMPS INFOG(icntl) 2815 2816 Level: beginner 2817 2818 References: 2819 . MUMPS Users' Guide 2820 2821 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2822 @*/ 2823 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2824 { 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 PetscValidType(F,1); 2829 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2830 PetscValidIntPointer(ival,3); 2831 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 /*@ 2836 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2837 2838 Logically Collective on Mat 2839 2840 Input Parameters: 2841 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2842 - icntl - index of MUMPS parameter array RINFO() 2843 2844 Output Parameter: 2845 . val - value of MUMPS RINFO(icntl) 2846 2847 Level: beginner 2848 2849 References: 2850 . MUMPS Users' Guide 2851 2852 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2853 @*/ 2854 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2855 { 2856 PetscErrorCode ierr; 2857 2858 PetscFunctionBegin; 2859 PetscValidType(F,1); 2860 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2861 PetscValidRealPointer(val,3); 2862 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 /*@ 2867 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2868 2869 Logically Collective on Mat 2870 2871 Input Parameters: 2872 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2873 - icntl - index of MUMPS parameter array RINFOG() 2874 2875 Output Parameter: 2876 . val - value of MUMPS RINFOG(icntl) 2877 2878 Level: beginner 2879 2880 References: 2881 . MUMPS Users' Guide 2882 2883 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2884 @*/ 2885 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2886 { 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 PetscValidType(F,1); 2891 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2892 PetscValidRealPointer(val,3); 2893 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2894 PetscFunctionReturn(0); 2895 } 2896 2897 /*MC 2898 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2899 distributed and sequential matrices via the external package MUMPS. 2900 2901 Works with MATAIJ and MATSBAIJ matrices 2902 2903 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2904 2905 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2906 2907 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2908 2909 Options Database Keys: 2910 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2911 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2912 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2913 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2914 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2915 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2916 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2917 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2918 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2919 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2920 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2921 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2922 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2923 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2924 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2925 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2926 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2927 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2928 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2929 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2930 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2931 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2932 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2933 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2934 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2935 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2936 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2937 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2938 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2939 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2940 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2941 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2942 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2943 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2944 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2945 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2946 2947 Level: beginner 2948 2949 Notes: 2950 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2951 2952 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2953 $ KSPGetPC(ksp,&pc); 2954 $ PCFactorGetMatrix(pc,&mat); 2955 $ MatMumpsGetInfo(mat,....); 2956 $ MatMumpsGetInfog(mat,....); etc. 2957 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2958 2959 Using MUMPS with 64-bit integers 2960 MUMPS provides 64-bit integer support in two build modes: 2961 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2962 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2963 2964 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2965 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2966 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2967 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2968 2969 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2970 2971 Two modes to run MUMPS/PETSc with OpenMP 2972 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2973 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2974 2975 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2976 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2977 2978 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2979 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2980 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2981 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2982 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2983 2984 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2985 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2986 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2987 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2988 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2989 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2990 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2991 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2992 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2993 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2994 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2995 examine the mapping result. 2996 2997 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2998 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2999 calls omp_set_num_threads(m) internally before calling MUMPS. 3000 3001 References: 3002 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3003 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3004 3005 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix() 3006 3007 M*/ 3008 3009 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 3010 { 3011 PetscFunctionBegin; 3012 *type = MATSOLVERMUMPS; 3013 PetscFunctionReturn(0); 3014 } 3015 3016 /* MatGetFactor for Seq and MPI AIJ matrices */ 3017 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3018 { 3019 Mat B; 3020 PetscErrorCode ierr; 3021 Mat_MUMPS *mumps; 3022 PetscBool isSeqAIJ; 3023 PetscMPIInt size; 3024 3025 PetscFunctionBegin; 3026 #if defined(PETSC_USE_COMPLEX) 3027 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3028 #endif 3029 /* Create the factorization matrix */ 3030 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3031 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3032 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3033 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3034 ierr = MatSetUp(B);CHKERRQ(ierr); 3035 3036 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3037 3038 B->ops->view = MatView_MUMPS; 3039 B->ops->getinfo = MatGetInfo_MUMPS; 3040 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3054 3055 if (ftype == MAT_FACTOR_LU) { 3056 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3057 B->factortype = MAT_FACTOR_LU; 3058 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3059 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3060 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3061 mumps->sym = 0; 3062 } else { 3063 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3064 B->factortype = MAT_FACTOR_CHOLESKY; 3065 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3066 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3067 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3068 #if defined(PETSC_USE_COMPLEX) 3069 mumps->sym = 2; 3070 #else 3071 if (A->spd_set && A->spd) mumps->sym = 1; 3072 else mumps->sym = 2; 3073 #endif 3074 } 3075 3076 /* set solvertype */ 3077 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3078 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3079 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3080 if (size == 1) { 3081 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3082 B->canuseordering = PETSC_TRUE; 3083 } 3084 B->ops->destroy = MatDestroy_MUMPS; 3085 B->data = (void*)mumps; 3086 3087 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3088 3089 *F = B; 3090 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3091 PetscFunctionReturn(0); 3092 } 3093 3094 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3095 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3096 { 3097 Mat B; 3098 PetscErrorCode ierr; 3099 Mat_MUMPS *mumps; 3100 PetscBool isSeqSBAIJ; 3101 PetscMPIInt size; 3102 3103 PetscFunctionBegin; 3104 #if defined(PETSC_USE_COMPLEX) 3105 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3106 #endif 3107 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3108 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3109 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3110 ierr = MatSetUp(B);CHKERRQ(ierr); 3111 3112 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3113 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3114 if (isSeqSBAIJ) { 3115 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3116 } else { 3117 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3118 } 3119 3120 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3121 B->ops->view = MatView_MUMPS; 3122 B->ops->getinfo = MatGetInfo_MUMPS; 3123 3124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3132 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3135 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3137 3138 B->factortype = MAT_FACTOR_CHOLESKY; 3139 #if defined(PETSC_USE_COMPLEX) 3140 mumps->sym = 2; 3141 #else 3142 if (A->spd_set && A->spd) mumps->sym = 1; 3143 else mumps->sym = 2; 3144 #endif 3145 3146 /* set solvertype */ 3147 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3148 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3149 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3150 if (size == 1) { 3151 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3152 B->canuseordering = PETSC_TRUE; 3153 } 3154 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3155 B->ops->destroy = MatDestroy_MUMPS; 3156 B->data = (void*)mumps; 3157 3158 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3159 3160 *F = B; 3161 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3162 PetscFunctionReturn(0); 3163 } 3164 3165 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3166 { 3167 Mat B; 3168 PetscErrorCode ierr; 3169 Mat_MUMPS *mumps; 3170 PetscBool isSeqBAIJ; 3171 PetscMPIInt size; 3172 3173 PetscFunctionBegin; 3174 /* Create the factorization matrix */ 3175 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3176 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3177 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3178 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3179 ierr = MatSetUp(B);CHKERRQ(ierr); 3180 3181 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3182 if (ftype == MAT_FACTOR_LU) { 3183 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3184 B->factortype = MAT_FACTOR_LU; 3185 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3186 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3187 mumps->sym = 0; 3188 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3189 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3190 3191 B->ops->view = MatView_MUMPS; 3192 B->ops->getinfo = MatGetInfo_MUMPS; 3193 3194 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3195 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3196 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3197 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3198 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3199 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3200 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3204 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3205 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3206 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3207 3208 /* set solvertype */ 3209 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3210 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3211 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3212 if (size == 1) { 3213 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3214 B->canuseordering = PETSC_TRUE; 3215 } 3216 B->ops->destroy = MatDestroy_MUMPS; 3217 B->data = (void*)mumps; 3218 3219 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3220 3221 *F = B; 3222 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3223 PetscFunctionReturn(0); 3224 } 3225 3226 /* MatGetFactor for Seq and MPI SELL matrices */ 3227 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3228 { 3229 Mat B; 3230 PetscErrorCode ierr; 3231 Mat_MUMPS *mumps; 3232 PetscBool isSeqSELL; 3233 PetscMPIInt size; 3234 3235 PetscFunctionBegin; 3236 /* Create the factorization matrix */ 3237 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3238 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3239 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3240 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3241 ierr = MatSetUp(B);CHKERRQ(ierr); 3242 3243 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3244 3245 B->ops->view = MatView_MUMPS; 3246 B->ops->getinfo = MatGetInfo_MUMPS; 3247 3248 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3251 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3253 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3255 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3256 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3258 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3259 3260 if (ftype == MAT_FACTOR_LU) { 3261 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3262 B->factortype = MAT_FACTOR_LU; 3263 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3264 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3265 mumps->sym = 0; 3266 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3267 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3268 3269 /* set solvertype */ 3270 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3271 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3272 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3273 if (size == 1) { 3274 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3275 B->canuseordering = PETSC_TRUE; 3276 } 3277 B->ops->destroy = MatDestroy_MUMPS; 3278 B->data = (void*)mumps; 3279 3280 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3281 3282 *F = B; 3283 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3284 PetscFunctionReturn(0); 3285 } 3286 3287 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3288 { 3289 PetscErrorCode ierr; 3290 3291 PetscFunctionBegin; 3292 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3293 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3294 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3295 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3296 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3297 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3298 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3299 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3300 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3301 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3302 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3303 PetscFunctionReturn(0); 3304 } 3305 3306