1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 Vec b_seq,x_seq; 170 PetscInt ninfo,*info; /* which INFO to display */ 171 PetscInt sizeredrhs; 172 PetscScalar *schur_sol; 173 PetscInt schur_sizesol; 174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 177 178 /* stuff used by petsc/mumps OpenMP support*/ 179 PetscBool use_petsc_omp_support; 180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 183 PetscMPIInt tag,omp_comm_size; 184 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 185 MPI_Request *reqs; 186 }; 187 188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 190 */ 191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 192 { 193 PetscErrorCode ierr; 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 231 mumps->id.size_schur = 0; 232 mumps->id.schur_lld = 0; 233 mumps->id.ICNTL(19) = 0; 234 PetscFunctionReturn(0); 235 } 236 237 /* solve with rhs in mumps->id.redrhs and return in the same location */ 238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 239 { 240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 241 Mat S,B,X; 242 MatFactorSchurStatus schurstatus; 243 PetscInt sizesol; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 263 } else { 264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 272 mumps->schur_sizesol = sizesol; 273 } 274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 278 #endif 279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 282 } else { 283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 284 } 285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 286 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 287 ierr = MatProductNumeric(X);CHKERRQ(ierr); 288 289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 290 break; 291 default: 292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 293 } 294 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 295 ierr = MatDestroy(&B);CHKERRQ(ierr); 296 ierr = MatDestroy(&X);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 PetscErrorCode ierr; 304 305 PetscFunctionBegin; 306 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 307 PetscFunctionReturn(0); 308 } 309 if (!expansion) { /* prepare for the condensation step */ 310 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 311 /* allocate MUMPS internal array to store reduced right-hand sides */ 312 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 313 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 314 mumps->id.lredrhs = mumps->id.size_schur; 315 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 317 } 318 mumps->id.ICNTL(26) = 1; /* condensation phase */ 319 } else { /* prepare for the expansion step */ 320 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 321 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 322 mumps->id.ICNTL(26) = 2; /* expansion phase */ 323 PetscMUMPS_c(mumps); 324 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 325 /* restore defaults */ 326 mumps->id.ICNTL(26) = -1; 327 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 328 if (mumps->id.nrhs > 1) { 329 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 330 mumps->id.lredrhs = 0; 331 mumps->sizeredrhs = 0; 332 } 333 } 334 PetscFunctionReturn(0); 335 } 336 337 /* 338 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 339 340 input: 341 A - matrix in aij,baij or sbaij format 342 shift - 0: C style output triple; 1: Fortran style output triple. 343 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 344 MAT_REUSE_MATRIX: only the values in v array are updated 345 output: 346 nnz - dim of r, c, and v (number of local nonzero entries of A) 347 r, c, v - row and col index, matrix values (matrix triples) 348 349 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 350 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 351 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 352 353 */ 354 355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 356 { 357 const PetscScalar *av; 358 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 359 PetscInt64 nz,rnz,i,j,k; 360 PetscErrorCode ierr; 361 PetscMUMPSInt *row,*col; 362 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 363 364 PetscFunctionBegin; 365 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 366 mumps->val = (PetscScalar*)av; 367 if (reuse == MAT_INITIAL_MATRIX) { 368 nz = aa->nz; 369 ai = aa->i; 370 aj = aa->j; 371 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 372 for (i=k=0; i<M; i++) { 373 rnz = ai[i+1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j=0; j<rnz; j++) { 376 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 377 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 378 k++; 379 } 380 } 381 mumps->irn = row; 382 mumps->jcn = col; 383 mumps->nnz = nz; 384 } 385 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 390 { 391 PetscErrorCode ierr; 392 PetscInt64 nz,i,j,k,r; 393 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 394 PetscMUMPSInt *row,*col; 395 396 PetscFunctionBegin; 397 mumps->val = a->val; 398 if (reuse == MAT_INITIAL_MATRIX) { 399 nz = a->sliidx[a->totalslices]; 400 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 401 for (i=k=0; i<a->totalslices; i++) { 402 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 403 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 404 } 405 } 406 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 407 mumps->irn = row; 408 mumps->jcn = col; 409 mumps->nnz = nz; 410 } 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 415 { 416 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 417 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 418 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 419 PetscInt bs; 420 PetscErrorCode ierr; 421 PetscMUMPSInt *row,*col; 422 423 PetscFunctionBegin; 424 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 425 M = A->rmap->N/bs; 426 mumps->val = aa->a; 427 if (reuse == MAT_INITIAL_MATRIX) { 428 ai = aa->i; aj = aa->j; 429 nz = bs2*aa->nz; 430 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 431 for (i=0; i<M; i++) { 432 ajj = aj + ai[i]; 433 rnz = ai[i+1] - ai[i]; 434 for (k=0; k<rnz; k++) { 435 for (j=0; j<bs; j++) { 436 for (m=0; m<bs; m++) { 437 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 438 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 439 idx++; 440 } 441 } 442 } 443 } 444 mumps->irn = row; 445 mumps->jcn = col; 446 mumps->nnz = nz; 447 } 448 PetscFunctionReturn(0); 449 } 450 451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 452 { 453 const PetscInt *ai, *aj,*ajj; 454 PetscInt bs; 455 PetscInt64 nz,rnz,i,j,k,m; 456 PetscErrorCode ierr; 457 PetscMUMPSInt *row,*col; 458 PetscScalar *val; 459 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 460 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscBool hermitian; 463 #endif 464 465 PetscFunctionBegin; 466 #if defined(PETSC_USE_COMPLEX) 467 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 468 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 469 #endif 470 ai = aa->i; 471 aj = aa->j; 472 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 473 if (reuse == MAT_INITIAL_MATRIX) { 474 nz = aa->nz; 475 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 476 if (bs>1) { 477 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 478 mumps->val = mumps->val_alloc; 479 } else { 480 mumps->val = aa->a; 481 } 482 mumps->irn = row; 483 mumps->jcn = col; 484 } else { 485 if (bs == 1) mumps->val = aa->a; 486 row = mumps->irn; 487 col = mumps->jcn; 488 } 489 val = mumps->val; 490 491 nz = 0; 492 if (bs>1) { 493 for (i=0; i<mbs; i++) { 494 rnz = ai[i+1] - ai[i]; 495 ajj = aj + ai[i]; 496 for (j=0; j<rnz; j++) { 497 for (k=0; k<bs; k++) { 498 for (m=0; m<bs; m++) { 499 if (ajj[j]>i || k>=m) { 500 if (reuse == MAT_INITIAL_MATRIX) { 501 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 502 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 503 } 504 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 505 } 506 } 507 } 508 } 509 } 510 } else if (reuse == MAT_INITIAL_MATRIX) { 511 for (i=0; i<mbs; i++) { 512 rnz = ai[i+1] - ai[i]; 513 ajj = aj + ai[i]; 514 for (j=0; j<rnz; j++) { 515 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 516 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 517 nz++; 518 } 519 } 520 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 521 } 522 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 527 { 528 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 529 PetscInt64 nz,rnz,i,j; 530 const PetscScalar *av,*v1; 531 PetscScalar *val; 532 PetscErrorCode ierr; 533 PetscMUMPSInt *row,*col; 534 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 535 PetscBool missing; 536 #if defined(PETSC_USE_COMPLEX) 537 PetscBool hermitian; 538 #endif 539 540 PetscFunctionBegin; 541 #if defined(PETSC_USE_COMPLEX) 542 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 543 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 544 #endif 545 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 546 ai = aa->i; aj = aa->j; 547 adiag = aa->diag; 548 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 549 if (reuse == MAT_INITIAL_MATRIX) { 550 /* count nz in the upper triangular part of A */ 551 nz = 0; 552 if (missing) { 553 for (i=0; i<M; i++) { 554 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 555 for (j=ai[i];j<ai[i+1];j++) { 556 if (aj[j] < i) continue; 557 nz++; 558 } 559 } else { 560 nz += ai[i+1] - adiag[i]; 561 } 562 } 563 } else { 564 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 565 } 566 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 567 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 568 mumps->nnz = nz; 569 mumps->irn = row; 570 mumps->jcn = col; 571 mumps->val = mumps->val_alloc = val; 572 573 nz = 0; 574 if (missing) { 575 for (i=0; i<M; i++) { 576 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 577 for (j=ai[i];j<ai[i+1];j++) { 578 if (aj[j] < i) continue; 579 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 580 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 581 val[nz] = av[j]; 582 nz++; 583 } 584 } else { 585 rnz = ai[i+1] - adiag[i]; 586 ajj = aj + adiag[i]; 587 v1 = av + adiag[i]; 588 for (j=0; j<rnz; j++) { 589 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 590 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 591 val[nz++] = v1[j]; 592 } 593 } 594 } 595 } else { 596 for (i=0; i<M; i++) { 597 rnz = ai[i+1] - adiag[i]; 598 ajj = aj + adiag[i]; 599 v1 = av + adiag[i]; 600 for (j=0; j<rnz; j++) { 601 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 602 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 603 val[nz++] = v1[j]; 604 } 605 } 606 } 607 } else { 608 nz = 0; 609 val = mumps->val; 610 if (missing) { 611 for (i=0; i <M; i++) { 612 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 613 for (j=ai[i];j<ai[i+1];j++) { 614 if (aj[j] < i) continue; 615 val[nz++] = av[j]; 616 } 617 } else { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } else { 626 for (i=0; i <M; i++) { 627 rnz = ai[i+1] - adiag[i]; 628 v1 = av + adiag[i]; 629 for (j=0; j<rnz; j++) { 630 val[nz++] = v1[j]; 631 } 632 } 633 } 634 } 635 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 640 { 641 PetscErrorCode ierr; 642 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 643 PetscInt bs; 644 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 645 PetscMUMPSInt *row,*col; 646 const PetscScalar *av,*bv,*v1,*v2; 647 PetscScalar *val; 648 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 649 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 650 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 651 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 652 #if defined(PETSC_USE_COMPLEX) 653 PetscBool hermitian; 654 #endif 655 656 PetscFunctionBegin; 657 #if defined(PETSC_USE_COMPLEX) 658 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 659 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 660 #endif 661 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 662 rstart = A->rmap->rstart; 663 ai = aa->i; 664 aj = aa->j; 665 bi = bb->i; 666 bj = bb->j; 667 av = aa->a; 668 bv = bb->a; 669 670 garray = mat->garray; 671 672 if (reuse == MAT_INITIAL_MATRIX) { 673 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 674 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 675 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 676 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 677 mumps->irn = row; 678 mumps->jcn = col; 679 mumps->val = mumps->val_alloc = val; 680 } else { 681 val = mumps->val; 682 } 683 684 jj = 0; irow = rstart; 685 for (i=0; i<mbs; i++) { 686 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 687 countA = ai[i+1] - ai[i]; 688 countB = bi[i+1] - bi[i]; 689 bjj = bj + bi[i]; 690 v1 = av + ai[i]*bs2; 691 v2 = bv + bi[i]*bs2; 692 693 if (bs>1) { 694 /* A-part */ 695 for (j=0; j<countA; j++) { 696 for (k=0; k<bs; k++) { 697 for (m=0; m<bs; m++) { 698 if (rstart + ajj[j]*bs>irow || k>=m) { 699 if (reuse == MAT_INITIAL_MATRIX) { 700 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 701 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 702 } 703 val[jj++] = v1[j*bs2 + m + k*bs]; 704 } 705 } 706 } 707 } 708 709 /* B-part */ 710 for (j=0; j < countB; j++) { 711 for (k=0; k<bs; k++) { 712 for (m=0; m<bs; m++) { 713 if (reuse == MAT_INITIAL_MATRIX) { 714 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 715 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 716 } 717 val[jj++] = v2[j*bs2 + m + k*bs]; 718 } 719 } 720 } 721 } else { 722 /* A-part */ 723 for (j=0; j<countA; j++) { 724 if (reuse == MAT_INITIAL_MATRIX) { 725 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 726 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 727 } 728 val[jj++] = v1[j]; 729 } 730 731 /* B-part */ 732 for (j=0; j < countB; j++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 735 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 736 } 737 val[jj++] = v2[j]; 738 } 739 } 740 irow+=bs; 741 } 742 mumps->nnz = jj; 743 PetscFunctionReturn(0); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 747 { 748 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 749 PetscErrorCode ierr; 750 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 751 PetscMUMPSInt *row,*col; 752 const PetscScalar *av, *bv,*v1,*v2; 753 PetscScalar *val; 754 Mat Ad,Ao; 755 Mat_SeqAIJ *aa; 756 Mat_SeqAIJ *bb; 757 758 PetscFunctionBegin; 759 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 760 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 761 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 762 763 aa = (Mat_SeqAIJ*)(Ad)->data; 764 bb = (Mat_SeqAIJ*)(Ao)->data; 765 ai = aa->i; 766 aj = aa->j; 767 bi = bb->i; 768 bj = bb->j; 769 770 rstart = A->rmap->rstart; 771 772 if (reuse == MAT_INITIAL_MATRIX) { 773 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 774 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 775 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 776 mumps->nnz = nz; 777 mumps->irn = row; 778 mumps->jcn = col; 779 mumps->val = mumps->val_alloc = val; 780 } else { 781 val = mumps->val; 782 } 783 784 jj = 0; irow = rstart; 785 for (i=0; i<m; i++) { 786 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 787 countA = ai[i+1] - ai[i]; 788 countB = bi[i+1] - bi[i]; 789 bjj = bj + bi[i]; 790 v1 = av + ai[i]; 791 v2 = bv + bi[i]; 792 793 /* A-part */ 794 for (j=0; j<countA; j++) { 795 if (reuse == MAT_INITIAL_MATRIX) { 796 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 797 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 798 } 799 val[jj++] = v1[j]; 800 } 801 802 /* B-part */ 803 for (j=0; j < countB; j++) { 804 if (reuse == MAT_INITIAL_MATRIX) { 805 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 806 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 807 } 808 val[jj++] = v2[j]; 809 } 810 irow++; 811 } 812 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 813 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 818 { 819 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 820 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 821 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 822 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 823 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 824 const PetscInt bs2=mat->bs2; 825 PetscErrorCode ierr; 826 PetscInt bs; 827 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 828 PetscMUMPSInt *row,*col; 829 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 830 PetscScalar *val; 831 832 PetscFunctionBegin; 833 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 834 if (reuse == MAT_INITIAL_MATRIX) { 835 nz = bs2*(aa->nz + bb->nz); 836 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 837 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 838 mumps->nnz = nz; 839 mumps->irn = row; 840 mumps->jcn = col; 841 mumps->val = mumps->val_alloc = val; 842 } else { 843 val = mumps->val; 844 } 845 846 jj = 0; irow = rstart; 847 for (i=0; i<mbs; i++) { 848 countA = ai[i+1] - ai[i]; 849 countB = bi[i+1] - bi[i]; 850 ajj = aj + ai[i]; 851 bjj = bj + bi[i]; 852 v1 = av + bs2*ai[i]; 853 v2 = bv + bs2*bi[i]; 854 855 idx = 0; 856 /* A-part */ 857 for (k=0; k<countA; k++) { 858 for (j=0; j<bs; j++) { 859 for (n=0; n<bs; n++) { 860 if (reuse == MAT_INITIAL_MATRIX) { 861 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 862 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 863 } 864 val[jj++] = v1[idx++]; 865 } 866 } 867 } 868 869 idx = 0; 870 /* B-part */ 871 for (k=0; k<countB; k++) { 872 for (j=0; j<bs; j++) { 873 for (n=0; n<bs; n++) { 874 if (reuse == MAT_INITIAL_MATRIX) { 875 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 876 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 877 } 878 val[jj++] = v2[idx++]; 879 } 880 } 881 } 882 irow += bs; 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 888 { 889 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 890 PetscErrorCode ierr; 891 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 892 PetscMUMPSInt *row,*col; 893 const PetscScalar *av, *bv,*v1,*v2; 894 PetscScalar *val; 895 Mat Ad,Ao; 896 Mat_SeqAIJ *aa; 897 Mat_SeqAIJ *bb; 898 #if defined(PETSC_USE_COMPLEX) 899 PetscBool hermitian; 900 #endif 901 902 PetscFunctionBegin; 903 #if defined(PETSC_USE_COMPLEX) 904 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 905 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 906 #endif 907 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 908 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 910 911 aa = (Mat_SeqAIJ*)(Ad)->data; 912 bb = (Mat_SeqAIJ*)(Ao)->data; 913 ai = aa->i; 914 aj = aa->j; 915 adiag = aa->diag; 916 bi = bb->i; 917 bj = bb->j; 918 919 rstart = A->rmap->rstart; 920 921 if (reuse == MAT_INITIAL_MATRIX) { 922 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 923 nzb = 0; /* num of upper triangular entries in mat->B */ 924 for (i=0; i<m; i++) { 925 nza += (ai[i+1] - adiag[i]); 926 countB = bi[i+1] - bi[i]; 927 bjj = bj + bi[i]; 928 for (j=0; j<countB; j++) { 929 if (garray[bjj[j]] > rstart) nzb++; 930 } 931 } 932 933 nz = nza + nzb; /* total nz of upper triangular part of mat */ 934 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 935 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 936 mumps->nnz = nz; 937 mumps->irn = row; 938 mumps->jcn = col; 939 mumps->val = mumps->val_alloc = val; 940 } else { 941 val = mumps->val; 942 } 943 944 jj = 0; irow = rstart; 945 for (i=0; i<m; i++) { 946 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 947 v1 = av + adiag[i]; 948 countA = ai[i+1] - adiag[i]; 949 countB = bi[i+1] - bi[i]; 950 bjj = bj + bi[i]; 951 v2 = bv + bi[i]; 952 953 /* A-part */ 954 for (j=0; j<countA; j++) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 957 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 958 } 959 val[jj++] = v1[j]; 960 } 961 962 /* B-part */ 963 for (j=0; j < countB; j++) { 964 if (garray[bjj[j]] > rstart) { 965 if (reuse == MAT_INITIAL_MATRIX) { 966 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 967 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 968 } 969 val[jj++] = v2[j]; 970 } 971 } 972 irow++; 973 } 974 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 975 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatDestroy_MUMPS(Mat A) 980 { 981 PetscErrorCode ierr; 982 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 983 984 PetscFunctionBegin; 985 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 986 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 988 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 989 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 990 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 991 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 994 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 995 mumps->id.job = JOB_END; 996 PetscMUMPS_c(mumps); 997 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 998 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 999 if (mumps->use_petsc_omp_support) { 1000 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1001 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1002 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1003 } 1004 #endif 1005 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1006 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1007 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1010 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRMPI(ierr);} 1011 ierr = PetscFree(A->data);CHKERRQ(ierr); 1012 1013 /* clear composed functions */ 1014 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1032 { 1033 PetscErrorCode ierr; 1034 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1035 const PetscMPIInt ompsize=mumps->omp_comm_size; 1036 PetscInt i,m,M,rstart; 1037 1038 PetscFunctionBegin; 1039 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1040 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1041 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1042 if (ompsize == 1) { 1043 if (!mumps->irhs_loc) { 1044 mumps->nloc_rhs = m; 1045 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1047 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1048 } 1049 mumps->id.rhs_loc = (MumpsScalar*)array; 1050 } else { 1051 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1052 const PetscInt *ranges; 1053 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1054 MPI_Group petsc_group,omp_group; 1055 PetscScalar *recvbuf=NULL; 1056 1057 if (mumps->is_omp_master) { 1058 /* Lazily initialize the omp stuff for distributed rhs */ 1059 if (!mumps->irhs_loc) { 1060 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1061 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1062 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr); 1063 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr); 1064 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1065 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr); 1066 1067 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1068 mumps->nloc_rhs = 0; 1069 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) { 1071 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1072 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1073 } 1074 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1075 for (j=k=0; j<ompsize; j++) { 1076 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1077 } 1078 1079 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1080 ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr); 1081 ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr); 1082 } 1083 1084 /* Realloc buffers when current nrhs is bigger than what we have met */ 1085 if (nrhs > mumps->max_nrhs) { 1086 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1087 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1088 mumps->max_nrhs = nrhs; 1089 } 1090 1091 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1092 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1093 mumps->rhs_disps[0] = 0; 1094 for (j=1; j<ompsize; j++) { 1095 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1096 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1097 } 1098 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1099 } 1100 1101 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1102 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr); 1103 1104 if (mumps->is_omp_master) { 1105 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1106 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1107 for (j=0; j<ompsize; j++) { 1108 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1109 dst = dstbase; 1110 for (i=0; i<nrhs; i++) { 1111 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1112 src += mumps->rhs_nrow[j]; 1113 dst += mumps->nloc_rhs; 1114 } 1115 dstbase += mumps->rhs_nrow[j]; 1116 } 1117 } 1118 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1119 } 1120 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1121 } 1122 mumps->id.nrhs = nrhs; 1123 mumps->id.nloc_rhs = mumps->nloc_rhs; 1124 mumps->id.lrhs_loc = mumps->nloc_rhs; 1125 mumps->id.irhs_loc = mumps->irhs_loc; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1130 { 1131 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1132 const PetscScalar *rarray = NULL; 1133 PetscScalar *array; 1134 IS is_iden,is_petsc; 1135 PetscErrorCode ierr; 1136 PetscInt i; 1137 PetscBool second_solve = PETSC_FALSE; 1138 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1139 1140 PetscFunctionBegin; 1141 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1142 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1143 1144 if (A->factorerrortype) { 1145 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1146 ierr = VecSetInf(x);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 mumps->id.nrhs = 1; 1151 if (mumps->petsc_size > 1) { 1152 if (mumps->ICNTL20 == 10) { 1153 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1154 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1155 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1156 } else { 1157 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/ 1158 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 if (!mumps->myid) { 1161 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1162 mumps->id.rhs = (MumpsScalar*)array; 1163 } 1164 } 1165 } else { /* petsc_size == 1 */ 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1167 ierr = VecCopy(b,x);CHKERRQ(ierr); 1168 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1169 mumps->id.rhs = (MumpsScalar*)array; 1170 } 1171 1172 /* 1173 handle condensation step of Schur complement (if any) 1174 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1175 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1176 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1177 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1178 */ 1179 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1180 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1181 second_solve = PETSC_TRUE; 1182 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) { 1192 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1193 } 1194 1195 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1196 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1197 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1198 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1199 } 1200 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1201 PetscInt *isol2_loc=NULL; 1202 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1203 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1204 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1206 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1209 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1210 } 1211 1212 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 } 1215 1216 if (mumps->petsc_size > 1) { 1217 if (mumps->ICNTL20 == 10) { 1218 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1219 } else if (!mumps->myid) { 1220 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1221 } 1222 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1223 1224 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1229 { 1230 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 mumps->id.ICNTL(9) = 0; 1235 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1236 mumps->id.ICNTL(9) = 1; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1241 { 1242 PetscErrorCode ierr; 1243 Mat Bt = NULL; 1244 PetscBool denseX,denseB,flg,flgT; 1245 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1246 PetscInt i,nrhs,M; 1247 PetscScalar *array; 1248 const PetscScalar *rbray; 1249 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1250 PetscMUMPSInt *isol_loc,*isol_loc_save; 1251 PetscScalar *bray,*sol_loc,*sol_loc_save; 1252 IS is_to,is_from; 1253 PetscInt k,proc,j,m,myrstart; 1254 const PetscInt *rstart; 1255 Vec v_mpi,msol_loc; 1256 VecScatter scat_sol; 1257 Vec b_seq; 1258 VecScatter scat_rhs; 1259 PetscScalar *aa; 1260 PetscInt spnr,*ia,*ja; 1261 Mat_MPIAIJ *b = NULL; 1262 1263 PetscFunctionBegin; 1264 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1265 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1266 1267 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1268 if (denseB) { 1269 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1270 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1271 } else { /* sparse B */ 1272 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1273 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1274 if (flgT) { /* input B is transpose of actural RHS matrix, 1275 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1276 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1277 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1278 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1279 } 1280 1281 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1282 mumps->id.nrhs = nrhs; 1283 mumps->id.lrhs = M; 1284 mumps->id.rhs = NULL; 1285 1286 if (mumps->petsc_size == 1) { 1287 PetscScalar *aa; 1288 PetscInt spnr,*ia,*ja; 1289 PetscBool second_solve = PETSC_FALSE; 1290 1291 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1292 mumps->id.rhs = (MumpsScalar*)array; 1293 1294 if (denseB) { 1295 /* copy B to X */ 1296 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1297 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1299 } else { /* sparse B */ 1300 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1301 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1302 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1303 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1304 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1305 } 1306 /* handle condensation step of Schur complement (if any) */ 1307 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1308 second_solve = PETSC_TRUE; 1309 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1310 } 1311 /* solve phase */ 1312 /*-------------*/ 1313 mumps->id.job = JOB_SOLVE; 1314 PetscMUMPS_c(mumps); 1315 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1316 1317 /* handle expansion step of Schur complement (if any) */ 1318 if (second_solve) { 1319 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1320 } 1321 if (!denseB) { /* sparse B */ 1322 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1323 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1324 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1325 } 1326 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1331 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1332 1333 /* create msol_loc to hold mumps local solution */ 1334 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1335 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1336 1337 lsol_loc = mumps->id.lsol_loc; 1338 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1339 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1340 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1341 mumps->id.isol_loc = isol_loc; 1342 1343 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1344 1345 if (denseB) { 1346 if (mumps->ICNTL20 == 10) { 1347 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1348 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1349 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1350 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1351 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1352 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1353 } else { 1354 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1355 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1356 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1357 0, re-arrange B into desired order, which is a local operation. 1358 */ 1359 1360 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1361 /* wrap dense rhs matrix B into a vector v_mpi */ 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1364 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1365 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1366 1367 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1368 if (!mumps->myid) { 1369 PetscInt *idx; 1370 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1371 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1372 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1373 k = 0; 1374 for (proc=0; proc<mumps->petsc_size; proc++){ 1375 for (j=0; j<nrhs; j++){ 1376 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1377 } 1378 } 1379 1380 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1382 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1383 } else { 1384 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1385 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1387 } 1388 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1389 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1392 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1393 1394 if (!mumps->myid) { /* define rhs on the host */ 1395 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1396 mumps->id.rhs = (MumpsScalar*)bray; 1397 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1398 } 1399 } 1400 } else { /* sparse B */ 1401 b = (Mat_MPIAIJ*)Bt->data; 1402 1403 /* wrap dense X into a vector v_mpi */ 1404 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1405 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1406 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1407 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1408 1409 if (!mumps->myid) { 1410 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1411 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1412 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1413 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1414 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1415 } else { 1416 mumps->id.irhs_ptr = NULL; 1417 mumps->id.irhs_sparse = NULL; 1418 mumps->id.nz_rhs = 0; 1419 mumps->id.rhs_sparse = NULL; 1420 } 1421 } 1422 1423 /* solve phase */ 1424 /*-------------*/ 1425 mumps->id.job = JOB_SOLVE; 1426 PetscMUMPS_c(mumps); 1427 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1428 1429 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1430 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1431 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1432 1433 /* create scatter scat_sol */ 1434 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1435 /* iidx: index for scatter mumps solution to petsc X */ 1436 1437 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1439 for (i=0; i<lsol_loc; i++) { 1440 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1441 1442 for (proc=0; proc<mumps->petsc_size; proc++){ 1443 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1444 myrstart = rstart[proc]; 1445 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1446 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1447 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1448 break; 1449 } 1450 } 1451 1452 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1453 } 1454 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1455 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1456 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1461 1462 /* free spaces */ 1463 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1464 mumps->id.isol_loc = isol_loc_save; 1465 1466 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1467 ierr = PetscFree(idxx);CHKERRQ(ierr); 1468 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1469 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1470 if (!denseB) { 1471 if (!mumps->myid) { 1472 b = (Mat_MPIAIJ*)Bt->data; 1473 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1474 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1475 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1476 } 1477 } else { 1478 if (mumps->ICNTL20 == 0) { 1479 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1480 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1481 } 1482 } 1483 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1484 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1489 { 1490 PetscErrorCode ierr; 1491 PetscBool flg; 1492 Mat B; 1493 1494 PetscFunctionBegin; 1495 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1496 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1497 1498 /* Create B=Bt^T that uses Bt's data structure */ 1499 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1500 1501 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1502 ierr = MatDestroy(&B);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #if !defined(PETSC_USE_COMPLEX) 1507 /* 1508 input: 1509 F: numeric factor 1510 output: 1511 nneg: total number of negative pivots 1512 nzero: total number of zero pivots 1513 npos: (global dimension of F) - nneg - nzero 1514 */ 1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1516 { 1517 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1518 PetscErrorCode ierr; 1519 PetscMPIInt size; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr); 1523 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1524 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1525 1526 if (nneg) *nneg = mumps->id.INFOG(12); 1527 if (nzero || npos) { 1528 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1529 if (nzero) *nzero = mumps->id.INFOG(28); 1530 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 #endif 1535 1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1537 { 1538 PetscErrorCode ierr; 1539 PetscInt i,nreqs; 1540 PetscMUMPSInt *irn,*jcn; 1541 PetscMPIInt count; 1542 PetscInt64 totnnz,remain; 1543 const PetscInt osize=mumps->omp_comm_size; 1544 PetscScalar *val; 1545 1546 PetscFunctionBegin; 1547 if (osize > 1) { 1548 if (reuse == MAT_INITIAL_MATRIX) { 1549 /* master first gathers counts of nonzeros to receive */ 1550 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1551 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr); 1552 1553 /* Then each computes number of send/recvs */ 1554 if (mumps->is_omp_master) { 1555 /* Start from 1 since self communication is not done in MPI */ 1556 nreqs = 0; 1557 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1558 } else { 1559 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1560 } 1561 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1562 1563 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1564 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1565 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1566 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1567 */ 1568 nreqs = 0; /* counter for actual send/recvs */ 1569 if (mumps->is_omp_master) { 1570 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1571 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1573 1574 /* Self communication */ 1575 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1576 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1577 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1578 1579 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1580 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1581 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1582 mumps->nnz = totnnz; 1583 mumps->irn = irn; 1584 mumps->jcn = jcn; 1585 mumps->val = mumps->val_alloc = val; 1586 1587 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1588 jcn += mumps->recvcount[0]; 1589 val += mumps->recvcount[0]; 1590 1591 /* Remote communication */ 1592 for (i=1; i<osize; i++) { 1593 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1594 remain = mumps->recvcount[i] - count; 1595 while (count>0) { 1596 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1597 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1598 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1599 irn += count; 1600 jcn += count; 1601 val += count; 1602 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1603 remain -= count; 1604 } 1605 } 1606 } else { 1607 irn = mumps->irn; 1608 jcn = mumps->jcn; 1609 val = mumps->val; 1610 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1611 remain = mumps->nnz - count; 1612 while (count>0) { 1613 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1614 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1615 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1616 irn += count; 1617 jcn += count; 1618 val += count; 1619 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1620 remain -= count; 1621 } 1622 } 1623 } else { 1624 nreqs = 0; 1625 if (mumps->is_omp_master) { 1626 val = mumps->val + mumps->recvcount[0]; 1627 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1628 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1629 remain = mumps->recvcount[i] - count; 1630 while (count>0) { 1631 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1632 val += count; 1633 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1634 remain -= count; 1635 } 1636 } 1637 } else { 1638 val = mumps->val; 1639 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1640 remain = mumps->nnz - count; 1641 while (count>0) { 1642 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr); 1643 val += count; 1644 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1645 remain -= count; 1646 } 1647 } 1648 } 1649 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1650 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1651 } 1652 PetscFunctionReturn(0); 1653 } 1654 1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1656 { 1657 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1658 PetscErrorCode ierr; 1659 PetscBool isMPIAIJ; 1660 1661 PetscFunctionBegin; 1662 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1663 if (mumps->id.INFOG(1) == -6) { 1664 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1665 } 1666 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1671 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1672 1673 /* numerical factorization phase */ 1674 /*-------------------------------*/ 1675 mumps->id.job = JOB_FACTNUMERIC; 1676 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1677 if (!mumps->myid) { 1678 mumps->id.a = (MumpsScalar*)mumps->val; 1679 } 1680 } else { 1681 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1682 } 1683 PetscMUMPS_c(mumps); 1684 if (mumps->id.INFOG(1) < 0) { 1685 if (A->erroriffailure) { 1686 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1687 } else { 1688 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1689 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1690 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1691 } else if (mumps->id.INFOG(1) == -13) { 1692 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1693 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1694 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1695 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1696 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1697 } else { 1698 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1699 F->factorerrortype = MAT_FACTOR_OTHER; 1700 } 1701 } 1702 } 1703 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1704 1705 F->assembled = PETSC_TRUE; 1706 mumps->matstruc = SAME_NONZERO_PATTERN; 1707 if (F->schur) { /* reset Schur status to unfactored */ 1708 #if defined(PETSC_HAVE_CUDA) 1709 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1710 #endif 1711 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1712 mumps->id.ICNTL(19) = 2; 1713 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1714 } 1715 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1716 } 1717 1718 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1719 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1720 1721 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1722 if (mumps->petsc_size > 1) { 1723 PetscInt lsol_loc; 1724 PetscScalar *sol_loc; 1725 1726 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1727 1728 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1729 if (mumps->x_seq) { 1730 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1731 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1732 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1733 } 1734 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1735 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1736 mumps->id.lsol_loc = lsol_loc; 1737 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1738 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1739 } 1740 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* Sets MUMPS options from the options database */ 1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1746 { 1747 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1748 PetscErrorCode ierr; 1749 PetscMUMPSInt icntl=0; 1750 PetscInt info[80],i,ninfo=80; 1751 PetscBool flg=PETSC_FALSE; 1752 1753 PetscFunctionBegin; 1754 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1755 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1756 if (flg) mumps->id.ICNTL(1) = icntl; 1757 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1758 if (flg) mumps->id.ICNTL(2) = icntl; 1759 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1760 if (flg) mumps->id.ICNTL(3) = icntl; 1761 1762 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1763 if (flg) mumps->id.ICNTL(4) = icntl; 1764 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1765 1766 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1767 if (flg) mumps->id.ICNTL(6) = icntl; 1768 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) { 1771 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n"); 1772 else mumps->id.ICNTL(7) = icntl; 1773 /* if ordering is passed into symbolic factorization routine then mumps->id.ICNTL(7) is automatically set to 1 so user need not set it.*/ 1774 } 1775 1776 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1777 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1783 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1784 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1785 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1786 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1787 } 1788 1789 /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake. 1790 So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1791 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1792 */ 1793 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 1794 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1795 #else 1796 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1797 #endif 1798 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1799 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20); 1800 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1801 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n"); 1802 #endif 1803 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1804 1805 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1806 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1808 if (mumps->id.ICNTL(24)) { 1809 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1810 } 1811 1812 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1817 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1819 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1821 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1824 1825 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1826 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1827 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1830 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1831 1832 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1833 1834 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1835 if (ninfo) { 1836 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1837 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1838 mumps->ninfo = ninfo; 1839 for (i=0; i<ninfo; i++) { 1840 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1841 else mumps->info[i] = info[i]; 1842 } 1843 } 1844 1845 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1850 { 1851 PetscErrorCode ierr; 1852 PetscInt nthreads=0; 1853 MPI_Comm newcomm=MPI_COMM_NULL; 1854 1855 PetscFunctionBegin; 1856 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1857 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr); 1858 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */ 1859 1860 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1861 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1862 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1863 if (mumps->use_petsc_omp_support) { 1864 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1865 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1866 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1867 #else 1868 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1869 #endif 1870 } else { 1871 mumps->omp_comm = PETSC_COMM_SELF; 1872 mumps->mumps_comm = mumps->petsc_comm; 1873 mumps->is_omp_master = PETSC_TRUE; 1874 } 1875 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr); 1876 mumps->reqs = NULL; 1877 mumps->tag = 0; 1878 1879 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1880 if (mumps->mumps_comm != MPI_COMM_NULL) { 1881 ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRMPI(ierr); 1882 mumps->mumps_comm = newcomm; 1883 } 1884 1885 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1886 mumps->id.job = JOB_INIT; 1887 mumps->id.par = 1; /* host participates factorizaton and solve */ 1888 mumps->id.sym = mumps->sym; 1889 1890 PetscMUMPS_c(mumps); 1891 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1892 1893 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1894 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1895 */ 1896 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRMPI(ierr); 1897 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr); 1898 1899 mumps->scat_rhs = NULL; 1900 mumps->scat_sol = NULL; 1901 1902 /* set PETSc-MUMPS default options - override MUMPS default */ 1903 mumps->id.ICNTL(3) = 0; 1904 mumps->id.ICNTL(4) = 0; 1905 if (mumps->petsc_size == 1) { 1906 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1907 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1908 } else { 1909 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1910 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1911 } 1912 1913 /* schur */ 1914 mumps->id.size_schur = 0; 1915 mumps->id.listvar_schur = NULL; 1916 mumps->id.schur = NULL; 1917 mumps->sizeredrhs = 0; 1918 mumps->schur_sol = NULL; 1919 mumps->schur_sizesol = 0; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1924 { 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 if (mumps->id.INFOG(1) < 0) { 1929 if (A->erroriffailure) { 1930 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1931 } else { 1932 if (mumps->id.INFOG(1) == -6) { 1933 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1934 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1935 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1936 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1937 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1938 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1939 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1940 } else { 1941 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1942 F->factorerrortype = MAT_FACTOR_OTHER; 1943 } 1944 } 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1950 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1951 { 1952 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1953 PetscErrorCode ierr; 1954 Vec b; 1955 const PetscInt M = A->rmap->N; 1956 1957 PetscFunctionBegin; 1958 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1959 1960 /* Set MUMPS options from the options database */ 1961 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1962 1963 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1964 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1965 1966 /* analysis phase */ 1967 /*----------------*/ 1968 mumps->id.job = JOB_FACTSYMBOLIC; 1969 mumps->id.n = M; 1970 switch (mumps->id.ICNTL(18)) { 1971 case 0: /* centralized assembled matrix input */ 1972 if (!mumps->myid) { 1973 mumps->id.nnz = mumps->nnz; 1974 mumps->id.irn = mumps->irn; 1975 mumps->id.jcn = mumps->jcn; 1976 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1977 if (r) { 1978 mumps->id.ICNTL(7) = 1; 1979 if (!mumps->myid) { 1980 const PetscInt *idx; 1981 PetscInt i; 1982 1983 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1984 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1985 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1986 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1987 } 1988 } 1989 } 1990 break; 1991 case 3: /* distributed assembled matrix input (size>1) */ 1992 mumps->id.nnz_loc = mumps->nnz; 1993 mumps->id.irn_loc = mumps->irn; 1994 mumps->id.jcn_loc = mumps->jcn; 1995 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1996 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1997 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1998 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1999 ierr = VecDestroy(&b);CHKERRQ(ierr); 2000 } 2001 break; 2002 } 2003 PetscMUMPS_c(mumps); 2004 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2005 2006 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2007 F->ops->solve = MatSolve_MUMPS; 2008 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2009 F->ops->matsolve = MatMatSolve_MUMPS; 2010 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2011 PetscFunctionReturn(0); 2012 } 2013 2014 /* Note the Petsc r and c permutations are ignored */ 2015 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2016 { 2017 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2018 PetscErrorCode ierr; 2019 Vec b; 2020 const PetscInt M = A->rmap->N; 2021 2022 PetscFunctionBegin; 2023 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2024 2025 /* Set MUMPS options from the options database */ 2026 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2027 2028 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2029 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2030 2031 /* analysis phase */ 2032 /*----------------*/ 2033 mumps->id.job = JOB_FACTSYMBOLIC; 2034 mumps->id.n = M; 2035 switch (mumps->id.ICNTL(18)) { 2036 case 0: /* centralized assembled matrix input */ 2037 if (!mumps->myid) { 2038 mumps->id.nnz = mumps->nnz; 2039 mumps->id.irn = mumps->irn; 2040 mumps->id.jcn = mumps->jcn; 2041 if (mumps->id.ICNTL(6)>1) { 2042 mumps->id.a = (MumpsScalar*)mumps->val; 2043 } 2044 } 2045 break; 2046 case 3: /* distributed assembled matrix input (size>1) */ 2047 mumps->id.nnz_loc = mumps->nnz; 2048 mumps->id.irn_loc = mumps->irn; 2049 mumps->id.jcn_loc = mumps->jcn; 2050 if (mumps->id.ICNTL(6)>1) { 2051 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2052 } 2053 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2054 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2055 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2056 ierr = VecDestroy(&b);CHKERRQ(ierr); 2057 } 2058 break; 2059 } 2060 PetscMUMPS_c(mumps); 2061 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2062 2063 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2064 F->ops->solve = MatSolve_MUMPS; 2065 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* Note the Petsc r permutation and factor info are ignored */ 2070 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2071 { 2072 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2073 PetscErrorCode ierr; 2074 Vec b; 2075 const PetscInt M = A->rmap->N; 2076 2077 PetscFunctionBegin; 2078 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2079 2080 /* Set MUMPS options from the options database */ 2081 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2082 2083 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2084 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2085 2086 /* analysis phase */ 2087 /*----------------*/ 2088 mumps->id.job = JOB_FACTSYMBOLIC; 2089 mumps->id.n = M; 2090 switch (mumps->id.ICNTL(18)) { 2091 case 0: /* centralized assembled matrix input */ 2092 if (!mumps->myid) { 2093 mumps->id.nnz = mumps->nnz; 2094 mumps->id.irn = mumps->irn; 2095 mumps->id.jcn = mumps->jcn; 2096 if (mumps->id.ICNTL(6)>1) { 2097 mumps->id.a = (MumpsScalar*)mumps->val; 2098 } 2099 } 2100 break; 2101 case 3: /* distributed assembled matrix input (size>1) */ 2102 mumps->id.nnz_loc = mumps->nnz; 2103 mumps->id.irn_loc = mumps->irn; 2104 mumps->id.jcn_loc = mumps->jcn; 2105 if (mumps->id.ICNTL(6)>1) { 2106 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2107 } 2108 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2109 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2110 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2111 ierr = VecDestroy(&b);CHKERRQ(ierr); 2112 } 2113 break; 2114 } 2115 PetscMUMPS_c(mumps); 2116 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2117 2118 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2119 F->ops->solve = MatSolve_MUMPS; 2120 F->ops->solvetranspose = MatSolve_MUMPS; 2121 F->ops->matsolve = MatMatSolve_MUMPS; 2122 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2123 #if defined(PETSC_USE_COMPLEX) 2124 F->ops->getinertia = NULL; 2125 #else 2126 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2127 #endif 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2132 { 2133 PetscErrorCode ierr; 2134 PetscBool iascii; 2135 PetscViewerFormat format; 2136 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2137 2138 PetscFunctionBegin; 2139 /* check if matrix is mumps type */ 2140 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2141 2142 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2143 if (iascii) { 2144 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2145 if (format == PETSC_VIEWER_ASCII_INFO) { 2146 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2150 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2158 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2159 if (mumps->id.ICNTL(11)>0) { 2160 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2166 } 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2170 /* ICNTL(15-17) not used */ 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2176 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2177 2178 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2183 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2184 2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2190 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2191 2192 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2193 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2196 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2197 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2198 2199 /* infomation local to each processor */ 2200 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2201 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2202 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2203 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2204 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2205 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2206 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2207 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2208 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2209 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2210 2211 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2212 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2213 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2214 2215 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2216 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2217 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2218 2219 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2220 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2221 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2222 2223 if (mumps->ninfo && mumps->ninfo <= 80){ 2224 PetscInt i; 2225 for (i=0; i<mumps->ninfo; i++){ 2226 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2227 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2228 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2229 } 2230 } 2231 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2232 2233 if (!mumps->myid) { /* information from the host */ 2234 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2237 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2238 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr); 2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr); 2272 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr); 2273 } 2274 } 2275 } 2276 PetscFunctionReturn(0); 2277 } 2278 2279 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2280 { 2281 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2282 2283 PetscFunctionBegin; 2284 info->block_size = 1.0; 2285 info->nz_allocated = mumps->id.INFOG(20); 2286 info->nz_used = mumps->id.INFOG(20); 2287 info->nz_unneeded = 0.0; 2288 info->assemblies = 0.0; 2289 info->mallocs = 0.0; 2290 info->memory = 0.0; 2291 info->fill_ratio_given = 0; 2292 info->fill_ratio_needed = 0; 2293 info->factor_mallocs = 0; 2294 PetscFunctionReturn(0); 2295 } 2296 2297 /* -------------------------------------------------------------------------------------------*/ 2298 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2299 { 2300 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2301 const PetscScalar *arr; 2302 const PetscInt *idxs; 2303 PetscInt size,i; 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2308 if (mumps->petsc_size > 1) { 2309 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2310 2311 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2312 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr); 2313 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2314 } 2315 2316 /* Schur complement matrix */ 2317 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2318 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2319 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2320 mumps->id.schur = (MumpsScalar*)arr; 2321 mumps->id.size_schur = size; 2322 mumps->id.schur_lld = size; 2323 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2324 if (mumps->sym == 1) { 2325 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2326 } 2327 2328 /* MUMPS expects Fortran style indices */ 2329 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2330 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2331 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2332 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2333 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2334 if (mumps->petsc_size > 1) { 2335 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2336 } else { 2337 if (F->factortype == MAT_FACTOR_LU) { 2338 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2339 } else { 2340 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2341 } 2342 } 2343 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2344 mumps->id.ICNTL(26) = -1; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 /* -------------------------------------------------------------------------------------------*/ 2349 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2350 { 2351 Mat St; 2352 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2353 PetscScalar *array; 2354 #if defined(PETSC_USE_COMPLEX) 2355 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2356 #endif 2357 PetscErrorCode ierr; 2358 2359 PetscFunctionBegin; 2360 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2361 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2362 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2363 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2364 ierr = MatSetUp(St);CHKERRQ(ierr); 2365 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2366 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2367 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2368 PetscInt i,j,N=mumps->id.size_schur; 2369 for (i=0;i<N;i++) { 2370 for (j=0;j<N;j++) { 2371 #if !defined(PETSC_USE_COMPLEX) 2372 PetscScalar val = mumps->id.schur[i*N+j]; 2373 #else 2374 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2375 #endif 2376 array[j*N+i] = val; 2377 } 2378 } 2379 } else { /* stored by columns */ 2380 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2381 } 2382 } else { /* either full or lower-triangular (not packed) */ 2383 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2384 PetscInt i,j,N=mumps->id.size_schur; 2385 for (i=0;i<N;i++) { 2386 for (j=i;j<N;j++) { 2387 #if !defined(PETSC_USE_COMPLEX) 2388 PetscScalar val = mumps->id.schur[i*N+j]; 2389 #else 2390 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2391 #endif 2392 array[i*N+j] = val; 2393 array[j*N+i] = val; 2394 } 2395 } 2396 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2397 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2398 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2399 PetscInt i,j,N=mumps->id.size_schur; 2400 for (i=0;i<N;i++) { 2401 for (j=0;j<i+1;j++) { 2402 #if !defined(PETSC_USE_COMPLEX) 2403 PetscScalar val = mumps->id.schur[i*N+j]; 2404 #else 2405 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2406 #endif 2407 array[i*N+j] = val; 2408 array[j*N+i] = val; 2409 } 2410 } 2411 } 2412 } 2413 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2414 *S = St; 2415 PetscFunctionReturn(0); 2416 } 2417 2418 /* -------------------------------------------------------------------------------------------*/ 2419 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2420 { 2421 PetscErrorCode ierr; 2422 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2423 2424 PetscFunctionBegin; 2425 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2430 { 2431 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2432 2433 PetscFunctionBegin; 2434 *ival = mumps->id.ICNTL(icntl); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 /*@ 2439 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2440 2441 Logically Collective on Mat 2442 2443 Input Parameters: 2444 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2445 . icntl - index of MUMPS parameter array ICNTL() 2446 - ival - value of MUMPS ICNTL(icntl) 2447 2448 Options Database: 2449 . -mat_mumps_icntl_<icntl> <ival> 2450 2451 Level: beginner 2452 2453 References: 2454 . MUMPS Users' Guide 2455 2456 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2457 @*/ 2458 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2459 { 2460 PetscErrorCode ierr; 2461 2462 PetscFunctionBegin; 2463 PetscValidType(F,1); 2464 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2465 PetscValidLogicalCollectiveInt(F,icntl,2); 2466 PetscValidLogicalCollectiveInt(F,ival,3); 2467 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 /*@ 2472 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2473 2474 Logically Collective on Mat 2475 2476 Input Parameters: 2477 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2478 - icntl - index of MUMPS parameter array ICNTL() 2479 2480 Output Parameter: 2481 . ival - value of MUMPS ICNTL(icntl) 2482 2483 Level: beginner 2484 2485 References: 2486 . MUMPS Users' Guide 2487 2488 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2489 @*/ 2490 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2491 { 2492 PetscErrorCode ierr; 2493 2494 PetscFunctionBegin; 2495 PetscValidType(F,1); 2496 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2497 PetscValidLogicalCollectiveInt(F,icntl,2); 2498 PetscValidIntPointer(ival,3); 2499 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /* -------------------------------------------------------------------------------------------*/ 2504 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2505 { 2506 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2507 2508 PetscFunctionBegin; 2509 mumps->id.CNTL(icntl) = val; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2514 { 2515 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2516 2517 PetscFunctionBegin; 2518 *val = mumps->id.CNTL(icntl); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /*@ 2523 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2524 2525 Logically Collective on Mat 2526 2527 Input Parameters: 2528 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2529 . icntl - index of MUMPS parameter array CNTL() 2530 - val - value of MUMPS CNTL(icntl) 2531 2532 Options Database: 2533 . -mat_mumps_cntl_<icntl> <val> 2534 2535 Level: beginner 2536 2537 References: 2538 . MUMPS Users' Guide 2539 2540 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2541 @*/ 2542 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2543 { 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 PetscValidType(F,1); 2548 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2549 PetscValidLogicalCollectiveInt(F,icntl,2); 2550 PetscValidLogicalCollectiveReal(F,val,3); 2551 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 /*@ 2556 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2557 2558 Logically Collective on Mat 2559 2560 Input Parameters: 2561 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2562 - icntl - index of MUMPS parameter array CNTL() 2563 2564 Output Parameter: 2565 . val - value of MUMPS CNTL(icntl) 2566 2567 Level: beginner 2568 2569 References: 2570 . MUMPS Users' Guide 2571 2572 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2573 @*/ 2574 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2575 { 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 PetscValidType(F,1); 2580 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2581 PetscValidLogicalCollectiveInt(F,icntl,2); 2582 PetscValidRealPointer(val,3); 2583 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2588 { 2589 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2590 2591 PetscFunctionBegin; 2592 *info = mumps->id.INFO(icntl); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2597 { 2598 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2599 2600 PetscFunctionBegin; 2601 *infog = mumps->id.INFOG(icntl); 2602 PetscFunctionReturn(0); 2603 } 2604 2605 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2606 { 2607 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2608 2609 PetscFunctionBegin; 2610 *rinfo = mumps->id.RINFO(icntl); 2611 PetscFunctionReturn(0); 2612 } 2613 2614 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2615 { 2616 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2617 2618 PetscFunctionBegin; 2619 *rinfog = mumps->id.RINFOG(icntl); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2624 { 2625 PetscErrorCode ierr; 2626 Mat Bt = NULL,Btseq = NULL; 2627 PetscBool flg; 2628 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2629 PetscScalar *aa; 2630 PetscInt spnr,*ia,*ja,M,nrhs; 2631 2632 PetscFunctionBegin; 2633 PetscValidIntPointer(spRHS,2); 2634 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2635 if (flg) { 2636 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2637 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2638 2639 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2640 2641 if (mumps->petsc_size > 1) { 2642 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2643 Btseq = b->A; 2644 } else { 2645 Btseq = Bt; 2646 } 2647 2648 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2649 mumps->id.nrhs = nrhs; 2650 mumps->id.lrhs = M; 2651 mumps->id.rhs = NULL; 2652 2653 if (!mumps->myid) { 2654 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2655 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2656 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2657 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2658 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2659 } else { 2660 mumps->id.irhs_ptr = NULL; 2661 mumps->id.irhs_sparse = NULL; 2662 mumps->id.nz_rhs = 0; 2663 mumps->id.rhs_sparse = NULL; 2664 } 2665 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2666 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2667 2668 /* solve phase */ 2669 /*-------------*/ 2670 mumps->id.job = JOB_SOLVE; 2671 PetscMUMPS_c(mumps); 2672 if (mumps->id.INFOG(1) < 0) 2673 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2674 2675 if (!mumps->myid) { 2676 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2677 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2678 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2679 } 2680 PetscFunctionReturn(0); 2681 } 2682 2683 /*@ 2684 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2685 2686 Logically Collective on Mat 2687 2688 Input Parameters: 2689 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2690 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2691 2692 Output Parameter: 2693 . spRHS - requested entries of inverse of A 2694 2695 Level: beginner 2696 2697 References: 2698 . MUMPS Users' Guide 2699 2700 .seealso: MatGetFactor(), MatCreateTranspose() 2701 @*/ 2702 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2703 { 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 PetscValidType(F,1); 2708 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2709 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2710 PetscFunctionReturn(0); 2711 } 2712 2713 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2714 { 2715 PetscErrorCode ierr; 2716 Mat spRHS; 2717 2718 PetscFunctionBegin; 2719 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2720 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2721 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 /*@ 2726 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2727 2728 Logically Collective on Mat 2729 2730 Input Parameters: 2731 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2732 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2733 2734 Output Parameter: 2735 . spRHST - requested entries of inverse of A^T 2736 2737 Level: beginner 2738 2739 References: 2740 . MUMPS Users' Guide 2741 2742 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2743 @*/ 2744 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2745 { 2746 PetscErrorCode ierr; 2747 PetscBool flg; 2748 2749 PetscFunctionBegin; 2750 PetscValidType(F,1); 2751 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2752 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2753 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2754 2755 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2756 PetscFunctionReturn(0); 2757 } 2758 2759 /*@ 2760 MatMumpsGetInfo - Get MUMPS parameter INFO() 2761 2762 Logically Collective on Mat 2763 2764 Input Parameters: 2765 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2766 - icntl - index of MUMPS parameter array INFO() 2767 2768 Output Parameter: 2769 . ival - value of MUMPS INFO(icntl) 2770 2771 Level: beginner 2772 2773 References: 2774 . MUMPS Users' Guide 2775 2776 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2777 @*/ 2778 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2779 { 2780 PetscErrorCode ierr; 2781 2782 PetscFunctionBegin; 2783 PetscValidType(F,1); 2784 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2785 PetscValidIntPointer(ival,3); 2786 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2787 PetscFunctionReturn(0); 2788 } 2789 2790 /*@ 2791 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2792 2793 Logically Collective on Mat 2794 2795 Input Parameters: 2796 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2797 - icntl - index of MUMPS parameter array INFOG() 2798 2799 Output Parameter: 2800 . ival - value of MUMPS INFOG(icntl) 2801 2802 Level: beginner 2803 2804 References: 2805 . MUMPS Users' Guide 2806 2807 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2808 @*/ 2809 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2810 { 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 PetscValidType(F,1); 2815 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2816 PetscValidIntPointer(ival,3); 2817 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 /*@ 2822 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2823 2824 Logically Collective on Mat 2825 2826 Input Parameters: 2827 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2828 - icntl - index of MUMPS parameter array RINFO() 2829 2830 Output Parameter: 2831 . val - value of MUMPS RINFO(icntl) 2832 2833 Level: beginner 2834 2835 References: 2836 . MUMPS Users' Guide 2837 2838 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2839 @*/ 2840 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2841 { 2842 PetscErrorCode ierr; 2843 2844 PetscFunctionBegin; 2845 PetscValidType(F,1); 2846 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2847 PetscValidRealPointer(val,3); 2848 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2849 PetscFunctionReturn(0); 2850 } 2851 2852 /*@ 2853 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2854 2855 Logically Collective on Mat 2856 2857 Input Parameters: 2858 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2859 - icntl - index of MUMPS parameter array RINFOG() 2860 2861 Output Parameter: 2862 . val - value of MUMPS RINFOG(icntl) 2863 2864 Level: beginner 2865 2866 References: 2867 . MUMPS Users' Guide 2868 2869 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2870 @*/ 2871 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2872 { 2873 PetscErrorCode ierr; 2874 2875 PetscFunctionBegin; 2876 PetscValidType(F,1); 2877 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2878 PetscValidRealPointer(val,3); 2879 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2880 PetscFunctionReturn(0); 2881 } 2882 2883 /*MC 2884 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2885 distributed and sequential matrices via the external package MUMPS. 2886 2887 Works with MATAIJ and MATSBAIJ matrices 2888 2889 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2890 2891 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2892 2893 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2894 2895 Options Database Keys: 2896 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2897 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2898 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2899 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2900 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2901 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis 2902 -pc_factor_mat_ordering_type <type> will automatically provide the ordering so setting this flag to 1 is not needed 2903 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2904 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2905 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2906 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2907 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2908 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2909 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2910 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2911 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2912 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2913 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2914 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2915 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2916 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2917 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2918 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2919 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2920 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2921 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2922 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2923 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2924 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2925 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2926 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2927 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2928 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2929 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2930 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2931 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2932 2933 If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1 2934 2935 Level: beginner 2936 2937 Notes: 2938 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2939 2940 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2941 $ KSPGetPC(ksp,&pc); 2942 $ PCFactorGetMatrix(pc,&mat); 2943 $ MatMumpsGetInfo(mat,....); 2944 $ MatMumpsGetInfog(mat,....); etc. 2945 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2946 2947 Two modes to run MUMPS/PETSc with OpenMP 2948 2949 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2950 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2951 2952 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2953 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2954 2955 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2956 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2957 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2958 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2959 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2960 2961 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2962 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2963 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2964 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2965 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2966 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2967 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2968 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2969 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2970 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2971 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2972 examine the mapping result. 2973 2974 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2975 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2976 calls omp_set_num_threads(m) internally before calling MUMPS. 2977 2978 References: 2979 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2980 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2981 2982 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2983 2984 M*/ 2985 2986 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2987 { 2988 PetscFunctionBegin; 2989 *type = MATSOLVERMUMPS; 2990 PetscFunctionReturn(0); 2991 } 2992 2993 /* MatGetFactor for Seq and MPI AIJ matrices */ 2994 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2995 { 2996 Mat B; 2997 PetscErrorCode ierr; 2998 Mat_MUMPS *mumps; 2999 PetscBool isSeqAIJ; 3000 PetscMPIInt size; 3001 3002 PetscFunctionBegin; 3003 #if defined(PETSC_USE_COMPLEX) 3004 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3005 #endif 3006 /* Create the factorization matrix */ 3007 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3008 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3009 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3010 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3011 ierr = MatSetUp(B);CHKERRQ(ierr); 3012 3013 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3014 3015 B->ops->view = MatView_MUMPS; 3016 B->ops->getinfo = MatGetInfo_MUMPS; 3017 3018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3031 3032 if (ftype == MAT_FACTOR_LU) { 3033 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3034 B->factortype = MAT_FACTOR_LU; 3035 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3036 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3037 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3038 mumps->sym = 0; 3039 } else { 3040 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3041 B->factortype = MAT_FACTOR_CHOLESKY; 3042 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3043 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3044 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3045 #if defined(PETSC_USE_COMPLEX) 3046 mumps->sym = 2; 3047 #else 3048 if (A->spd_set && A->spd) mumps->sym = 1; 3049 else mumps->sym = 2; 3050 #endif 3051 } 3052 3053 /* set solvertype */ 3054 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3055 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3056 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3057 if (size == 1) { 3058 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3059 B->canuseordering = PETSC_TRUE; 3060 } 3061 B->ops->destroy = MatDestroy_MUMPS; 3062 B->data = (void*)mumps; 3063 3064 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3065 3066 *F = B; 3067 PetscFunctionReturn(0); 3068 } 3069 3070 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3071 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3072 { 3073 Mat B; 3074 PetscErrorCode ierr; 3075 Mat_MUMPS *mumps; 3076 PetscBool isSeqSBAIJ; 3077 PetscMPIInt size; 3078 3079 PetscFunctionBegin; 3080 #if defined(PETSC_USE_COMPLEX) 3081 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3082 #endif 3083 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3084 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3085 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3086 ierr = MatSetUp(B);CHKERRQ(ierr); 3087 3088 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3089 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3090 if (isSeqSBAIJ) { 3091 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3092 } else { 3093 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3094 } 3095 3096 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3097 B->ops->view = MatView_MUMPS; 3098 B->ops->getinfo = MatGetInfo_MUMPS; 3099 3100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3113 3114 B->factortype = MAT_FACTOR_CHOLESKY; 3115 #if defined(PETSC_USE_COMPLEX) 3116 mumps->sym = 2; 3117 #else 3118 if (A->spd_set && A->spd) mumps->sym = 1; 3119 else mumps->sym = 2; 3120 #endif 3121 3122 /* set solvertype */ 3123 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3124 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3125 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3126 if (size == 1) { 3127 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3128 B->canuseordering = PETSC_TRUE; 3129 } 3130 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr); 3131 B->ops->destroy = MatDestroy_MUMPS; 3132 B->data = (void*)mumps; 3133 3134 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3135 3136 *F = B; 3137 PetscFunctionReturn(0); 3138 } 3139 3140 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3141 { 3142 Mat B; 3143 PetscErrorCode ierr; 3144 Mat_MUMPS *mumps; 3145 PetscBool isSeqBAIJ; 3146 PetscMPIInt size; 3147 3148 PetscFunctionBegin; 3149 /* Create the factorization matrix */ 3150 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3151 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3152 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3153 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3154 ierr = MatSetUp(B);CHKERRQ(ierr); 3155 3156 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3157 if (ftype == MAT_FACTOR_LU) { 3158 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3159 B->factortype = MAT_FACTOR_LU; 3160 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3161 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3162 mumps->sym = 0; 3163 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3164 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3165 3166 B->ops->view = MatView_MUMPS; 3167 B->ops->getinfo = MatGetInfo_MUMPS; 3168 3169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3173 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3182 3183 /* set solvertype */ 3184 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3185 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3186 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3187 if (size == 1) { 3188 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3189 B->canuseordering = PETSC_TRUE; 3190 } 3191 B->ops->destroy = MatDestroy_MUMPS; 3192 B->data = (void*)mumps; 3193 3194 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3195 3196 *F = B; 3197 PetscFunctionReturn(0); 3198 } 3199 3200 /* MatGetFactor for Seq and MPI SELL matrices */ 3201 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3202 { 3203 Mat B; 3204 PetscErrorCode ierr; 3205 Mat_MUMPS *mumps; 3206 PetscBool isSeqSELL; 3207 PetscMPIInt size; 3208 3209 PetscFunctionBegin; 3210 /* Create the factorization matrix */ 3211 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3212 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3213 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3214 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3215 ierr = MatSetUp(B);CHKERRQ(ierr); 3216 3217 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3218 3219 B->ops->view = MatView_MUMPS; 3220 B->ops->getinfo = MatGetInfo_MUMPS; 3221 3222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3223 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3224 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3226 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3227 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3228 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3229 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3230 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3231 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3232 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3233 3234 if (ftype == MAT_FACTOR_LU) { 3235 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3236 B->factortype = MAT_FACTOR_LU; 3237 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3238 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3239 mumps->sym = 0; 3240 ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr); 3241 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3242 3243 /* set solvertype */ 3244 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3245 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3246 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 3247 if (size == 1) { 3248 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3249 B->canuseordering = PETSC_TRUE; 3250 } 3251 B->ops->destroy = MatDestroy_MUMPS; 3252 B->data = (void*)mumps; 3253 3254 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3255 3256 *F = B; 3257 PetscFunctionReturn(0); 3258 } 3259 3260 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3261 { 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3266 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3267 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3268 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3269 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3270 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3271 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3272 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3273 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3274 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3275 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 3279