1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(0); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval,value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(0); 89 } 90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm));\ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm));\ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { MUMPS_c(&mumps->id); } while (0) 118 #endif 119 120 /* declare MumpsScalar */ 121 #if defined(PETSC_USE_COMPLEX) 122 #if defined(PETSC_USE_REAL_SINGLE) 123 #define MumpsScalar mumps_complex 124 #else 125 #define MumpsScalar mumps_double_complex 126 #endif 127 #else 128 #define MumpsScalar PetscScalar 129 #endif 130 131 /* macros s.t. indices match MUMPS documentation */ 132 #define ICNTL(I) icntl[(I)-1] 133 #define CNTL(I) cntl[(I)-1] 134 #define INFOG(I) infog[(I)-1] 135 #define INFO(I) info[(I)-1] 136 #define RINFOG(I) rinfog[(I)-1] 137 #define RINFO(I) rinfo[(I)-1] 138 139 typedef struct Mat_MUMPS Mat_MUMPS; 140 struct Mat_MUMPS { 141 #if defined(PETSC_USE_COMPLEX) 142 #if defined(PETSC_USE_REAL_SINGLE) 143 CMUMPS_STRUC_C id; 144 #else 145 ZMUMPS_STRUC_C id; 146 #endif 147 #else 148 #if defined(PETSC_USE_REAL_SINGLE) 149 SMUMPS_STRUC_C id; 150 #else 151 DMUMPS_STRUC_C id; 152 #endif 153 #endif 154 155 MatStructure matstruc; 156 PetscMPIInt myid,petsc_size; 157 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 158 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 159 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 160 PetscMUMPSInt sym; 161 MPI_Comm mumps_comm; 162 PetscMUMPSInt *ICNTL_pre; 163 PetscReal *CNTL_pre; 164 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 165 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 166 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 167 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 168 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 169 PetscInt *rhs_nrow,max_nrhs; 170 PetscMPIInt *rhs_recvcounts,*rhs_disps; 171 PetscScalar *rhs_loc,*rhs_recvbuf; 172 #endif 173 Vec b_seq,x_seq; 174 PetscInt ninfo,*info; /* which INFO to display */ 175 PetscInt sizeredrhs; 176 PetscScalar *schur_sol; 177 PetscInt schur_sizesol; 178 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 179 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 180 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 181 182 /* stuff used by petsc/mumps OpenMP support*/ 183 PetscBool use_petsc_omp_support; 184 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 185 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 186 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 187 PetscMPIInt tag,omp_comm_size; 188 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 189 MPI_Request *reqs; 190 }; 191 192 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 193 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 194 */ 195 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 196 { 197 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 198 199 PetscFunctionBegin; 200 #if defined(PETSC_USE_64BIT_INDICES) 201 { 202 PetscInt i; 203 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 204 PetscCall(PetscFree(mumps->ia_alloc)); 205 PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc)); 206 mumps->cur_ilen = nrow+1; 207 } 208 if (nnz > mumps->cur_jlen) { 209 PetscCall(PetscFree(mumps->ja_alloc)); 210 PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc)); 211 mumps->cur_jlen = nnz; 212 } 213 for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]))); 214 for (i=0; i<nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]))); 215 *ia_mumps = mumps->ia_alloc; 216 *ja_mumps = mumps->ja_alloc; 217 } 218 #else 219 *ia_mumps = ia; 220 *ja_mumps = ja; 221 #endif 222 PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps)); 223 PetscFunctionReturn(0); 224 } 225 226 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 227 { 228 PetscFunctionBegin; 229 PetscCall(PetscFree(mumps->id.listvar_schur)); 230 PetscCall(PetscFree(mumps->id.redrhs)); 231 PetscCall(PetscFree(mumps->schur_sol)); 232 mumps->id.size_schur = 0; 233 mumps->id.schur_lld = 0; 234 mumps->id.ICNTL(19) = 0; 235 PetscFunctionReturn(0); 236 } 237 238 /* solve with rhs in mumps->id.redrhs and return in the same location */ 239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 240 { 241 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 242 Mat S,B,X; 243 MatFactorSchurStatus schurstatus; 244 PetscInt sizesol; 245 246 PetscFunctionBegin; 247 PetscCall(MatFactorFactorizeSchurComplement(F)); 248 PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus)); 249 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B)); 250 PetscCall(MatSetType(B,((PetscObject)S)->type_name)); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 PetscCall(MatBindToCPU(B,S->boundtocpu)); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X)); 257 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 PetscCall(MatBindToCPU(X,S->boundtocpu)); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 PetscCall(MatMatSolveTranspose(S,B,X)); 263 } else { 264 PetscCall(MatMatSolve(S,B,X)); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 PetscCall(PetscFree(mumps->schur_sol)); 271 PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol)); 272 mumps->schur_sizesol = sizesol; 273 } 274 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X)); 275 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 PetscCall(MatBindToCPU(X,S->boundtocpu)); 278 #endif 279 PetscCall(MatProductCreateWithMat(S,B,NULL,X)); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 PetscCall(MatProductSetType(X,MATPRODUCT_AtB)); 282 } else { 283 PetscCall(MatProductSetType(X,MATPRODUCT_AB)); 284 } 285 PetscCall(MatProductSetFromOptions(X)); 286 PetscCall(MatProductSymbolic(X)); 287 PetscCall(MatProductNumeric(X)); 288 289 PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN)); 290 break; 291 default: 292 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 293 } 294 PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus)); 295 PetscCall(MatDestroy(&B)); 296 PetscCall(MatDestroy(&X)); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 304 PetscFunctionBegin; 305 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 306 PetscFunctionReturn(0); 307 } 308 if (!expansion) { /* prepare for the condensation step */ 309 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 310 /* allocate MUMPS internal array to store reduced right-hand sides */ 311 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 312 PetscCall(PetscFree(mumps->id.redrhs)); 313 mumps->id.lredrhs = mumps->id.size_schur; 314 PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs)); 315 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 316 } 317 mumps->id.ICNTL(26) = 1; /* condensation phase */ 318 } else { /* prepare for the expansion step */ 319 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 320 PetscCall(MatMumpsSolveSchur_Private(F)); 321 mumps->id.ICNTL(26) = 2; /* expansion phase */ 322 PetscMUMPS_c(mumps); 323 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 324 /* restore defaults */ 325 mumps->id.ICNTL(26) = -1; 326 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 327 if (mumps->id.nrhs > 1) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = 0; 330 mumps->sizeredrhs = 0; 331 } 332 } 333 PetscFunctionReturn(0); 334 } 335 336 /* 337 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 338 339 input: 340 A - matrix in aij,baij or sbaij format 341 shift - 0: C style output triple; 1: Fortran style output triple. 342 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 343 MAT_REUSE_MATRIX: only the values in v array are updated 344 output: 345 nnz - dim of r, c, and v (number of local nonzero entries of A) 346 r, c, v - row and col index, matrix values (matrix triples) 347 348 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 349 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 350 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 351 352 */ 353 354 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 355 { 356 const PetscScalar *av; 357 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 358 PetscInt64 nz,rnz,i,j,k; 359 PetscMUMPSInt *row,*col; 360 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 364 mumps->val = (PetscScalar*)av; 365 if (reuse == MAT_INITIAL_MATRIX) { 366 nz = aa->nz; 367 ai = aa->i; 368 aj = aa->j; 369 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 370 for (i=k=0; i<M; i++) { 371 rnz = ai[i+1] - ai[i]; 372 ajj = aj + ai[i]; 373 for (j=0; j<rnz; j++) { 374 PetscCall(PetscMUMPSIntCast(i+shift,&row[k])); 375 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k])); 376 k++; 377 } 378 } 379 mumps->irn = row; 380 mumps->jcn = col; 381 mumps->nnz = nz; 382 } 383 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 384 PetscFunctionReturn(0); 385 } 386 387 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 388 { 389 PetscInt64 nz,i,j,k,r; 390 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 391 PetscMUMPSInt *row,*col; 392 393 PetscFunctionBegin; 394 mumps->val = a->val; 395 if (reuse == MAT_INITIAL_MATRIX) { 396 nz = a->sliidx[a->totalslices]; 397 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 398 for (i=k=0; i<a->totalslices; i++) { 399 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 400 PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++])); 401 } 402 } 403 for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 414 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 415 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 416 PetscInt bs; 417 PetscMUMPSInt *row,*col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A,&bs)); 421 M = A->rmap->N/bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; aj = aa->j; 425 nz = bs2*aa->nz; 426 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 427 for (i=0; i<M; i++) { 428 ajj = aj + ai[i]; 429 rnz = ai[i+1] - ai[i]; 430 for (k=0; k<rnz; k++) { 431 for (j=0; j<bs; j++) { 432 for (m=0; m<bs; m++) { 433 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx])); 434 PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx])); 435 idx++; 436 } 437 } 438 } 439 } 440 mumps->irn = row; 441 mumps->jcn = col; 442 mumps->nnz = nz; 443 } 444 PetscFunctionReturn(0); 445 } 446 447 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 448 { 449 const PetscInt *ai, *aj,*ajj; 450 PetscInt bs; 451 PetscInt64 nz,rnz,i,j,k,m; 452 PetscMUMPSInt *row,*col; 453 PetscScalar *val; 454 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 455 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 456 #if defined(PETSC_USE_COMPLEX) 457 PetscBool isset,hermitian; 458 #endif 459 460 PetscFunctionBegin; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 463 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 464 #endif 465 ai = aa->i; 466 aj = aa->j; 467 PetscCall(MatGetBlockSize(A,&bs)); 468 if (reuse == MAT_INITIAL_MATRIX) { 469 nz = aa->nz; 470 PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col)); 471 if (bs>1) { 472 PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc)); 473 mumps->val = mumps->val_alloc; 474 } else { 475 mumps->val = aa->a; 476 } 477 mumps->irn = row; 478 mumps->jcn = col; 479 } else { 480 if (bs == 1) mumps->val = aa->a; 481 row = mumps->irn; 482 col = mumps->jcn; 483 } 484 val = mumps->val; 485 486 nz = 0; 487 if (bs>1) { 488 for (i=0; i<mbs; i++) { 489 rnz = ai[i+1] - ai[i]; 490 ajj = aj + ai[i]; 491 for (j=0; j<rnz; j++) { 492 for (k=0; k<bs; k++) { 493 for (m=0; m<bs; m++) { 494 if (ajj[j]>i || k>=m) { 495 if (reuse == MAT_INITIAL_MATRIX) { 496 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz])); 497 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz])); 498 } 499 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 500 } 501 } 502 } 503 } 504 } 505 } else if (reuse == MAT_INITIAL_MATRIX) { 506 for (i=0; i<mbs; i++) { 507 rnz = ai[i+1] - ai[i]; 508 ajj = aj + ai[i]; 509 for (j=0; j<rnz; j++) { 510 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 511 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 512 nz++; 513 } 514 } 515 PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 516 } 517 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 522 { 523 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 524 PetscInt64 nz,rnz,i,j; 525 const PetscScalar *av,*v1; 526 PetscScalar *val; 527 PetscMUMPSInt *row,*col; 528 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 529 PetscBool missing; 530 #if defined(PETSC_USE_COMPLEX) 531 PetscBool hermitian,isset; 532 #endif 533 534 PetscFunctionBegin; 535 #if defined(PETSC_USE_COMPLEX) 536 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 537 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 538 #endif 539 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 540 ai = aa->i; aj = aa->j; 541 adiag = aa->diag; 542 PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL)); 543 if (reuse == MAT_INITIAL_MATRIX) { 544 /* count nz in the upper triangular part of A */ 545 nz = 0; 546 if (missing) { 547 for (i=0; i<M; i++) { 548 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 549 for (j=ai[i];j<ai[i+1];j++) { 550 if (aj[j] < i) continue; 551 nz++; 552 } 553 } else { 554 nz += ai[i+1] - adiag[i]; 555 } 556 } 557 } else { 558 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 559 } 560 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 561 PetscCall(PetscMalloc1(nz,&val)); 562 mumps->nnz = nz; 563 mumps->irn = row; 564 mumps->jcn = col; 565 mumps->val = mumps->val_alloc = val; 566 567 nz = 0; 568 if (missing) { 569 for (i=0; i<M; i++) { 570 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 571 for (j=ai[i];j<ai[i+1];j++) { 572 if (aj[j] < i) continue; 573 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 574 PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz])); 575 val[nz] = av[j]; 576 nz++; 577 } 578 } else { 579 rnz = ai[i+1] - adiag[i]; 580 ajj = aj + adiag[i]; 581 v1 = av + adiag[i]; 582 for (j=0; j<rnz; j++) { 583 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 584 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 585 val[nz++] = v1[j]; 586 } 587 } 588 } 589 } else { 590 for (i=0; i<M; i++) { 591 rnz = ai[i+1] - adiag[i]; 592 ajj = aj + adiag[i]; 593 v1 = av + adiag[i]; 594 for (j=0; j<rnz; j++) { 595 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 596 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 597 val[nz++] = v1[j]; 598 } 599 } 600 } 601 } else { 602 nz = 0; 603 val = mumps->val; 604 if (missing) { 605 for (i=0; i <M; i++) { 606 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 607 for (j=ai[i];j<ai[i+1];j++) { 608 if (aj[j] < i) continue; 609 val[nz++] = av[j]; 610 } 611 } else { 612 rnz = ai[i+1] - adiag[i]; 613 v1 = av + adiag[i]; 614 for (j=0; j<rnz; j++) { 615 val[nz++] = v1[j]; 616 } 617 } 618 } 619 } else { 620 for (i=0; i <M; i++) { 621 rnz = ai[i+1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j=0; j<rnz; j++) { 624 val[nz++] = v1[j]; 625 } 626 } 627 } 628 } 629 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 634 { 635 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 636 PetscInt bs; 637 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 638 PetscMUMPSInt *row,*col; 639 const PetscScalar *av,*bv,*v1,*v2; 640 PetscScalar *val; 641 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 642 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 643 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 644 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 645 #if defined(PETSC_USE_COMPLEX) 646 PetscBool hermitian,isset; 647 #endif 648 649 PetscFunctionBegin; 650 #if defined(PETSC_USE_COMPLEX) 651 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 652 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 653 #endif 654 PetscCall(MatGetBlockSize(A,&bs)); 655 rstart = A->rmap->rstart; 656 ai = aa->i; 657 aj = aa->j; 658 bi = bb->i; 659 bj = bb->j; 660 av = aa->a; 661 bv = bb->a; 662 663 garray = mat->garray; 664 665 if (reuse == MAT_INITIAL_MATRIX) { 666 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 667 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 668 PetscCall(PetscMalloc1(nz,&val)); 669 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 670 mumps->irn = row; 671 mumps->jcn = col; 672 mumps->val = mumps->val_alloc = val; 673 } else { 674 val = mumps->val; 675 } 676 677 jj = 0; irow = rstart; 678 for (i=0; i<mbs; i++) { 679 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 680 countA = ai[i+1] - ai[i]; 681 countB = bi[i+1] - bi[i]; 682 bjj = bj + bi[i]; 683 v1 = av + ai[i]*bs2; 684 v2 = bv + bi[i]*bs2; 685 686 if (bs>1) { 687 /* A-part */ 688 for (j=0; j<countA; j++) { 689 for (k=0; k<bs; k++) { 690 for (m=0; m<bs; m++) { 691 if (rstart + ajj[j]*bs>irow || k>=m) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 694 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj])); 695 } 696 val[jj++] = v1[j*bs2 + m + k*bs]; 697 } 698 } 699 } 700 } 701 702 /* B-part */ 703 for (j=0; j < countB; j++) { 704 for (k=0; k<bs; k++) { 705 for (m=0; m<bs; m++) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 708 PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj])); 709 } 710 val[jj++] = v2[j*bs2 + m + k*bs]; 711 } 712 } 713 } 714 } else { 715 /* A-part */ 716 for (j=0; j<countA; j++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 719 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 720 } 721 val[jj++] = v1[j]; 722 } 723 724 /* B-part */ 725 for (j=0; j < countB; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 728 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 729 } 730 val[jj++] = v2[j]; 731 } 732 } 733 irow+=bs; 734 } 735 mumps->nnz = jj; 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 740 { 741 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 742 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 743 PetscMUMPSInt *row,*col; 744 const PetscScalar *av, *bv,*v1,*v2; 745 PetscScalar *val; 746 Mat Ad,Ao; 747 Mat_SeqAIJ *aa; 748 Mat_SeqAIJ *bb; 749 750 PetscFunctionBegin; 751 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 752 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 753 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 754 755 aa = (Mat_SeqAIJ*)(Ad)->data; 756 bb = (Mat_SeqAIJ*)(Ao)->data; 757 ai = aa->i; 758 aj = aa->j; 759 bi = bb->i; 760 bj = bb->j; 761 762 rstart = A->rmap->rstart; 763 764 if (reuse == MAT_INITIAL_MATRIX) { 765 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 766 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 767 PetscCall(PetscMalloc1(nz,&val)); 768 mumps->nnz = nz; 769 mumps->irn = row; 770 mumps->jcn = col; 771 mumps->val = mumps->val_alloc = val; 772 } else { 773 val = mumps->val; 774 } 775 776 jj = 0; irow = rstart; 777 for (i=0; i<m; i++) { 778 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 779 countA = ai[i+1] - ai[i]; 780 countB = bi[i+1] - bi[i]; 781 bjj = bj + bi[i]; 782 v1 = av + ai[i]; 783 v2 = bv + bi[i]; 784 785 /* A-part */ 786 for (j=0; j<countA; j++) { 787 if (reuse == MAT_INITIAL_MATRIX) { 788 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 789 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 790 } 791 val[jj++] = v1[j]; 792 } 793 794 /* B-part */ 795 for (j=0; j < countB; j++) { 796 if (reuse == MAT_INITIAL_MATRIX) { 797 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 798 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 799 } 800 val[jj++] = v2[j]; 801 } 802 irow++; 803 } 804 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 805 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 810 { 811 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 812 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 813 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 814 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 815 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 816 const PetscInt bs2=mat->bs2; 817 PetscInt bs; 818 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 819 PetscMUMPSInt *row,*col; 820 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 821 PetscScalar *val; 822 823 PetscFunctionBegin; 824 PetscCall(MatGetBlockSize(A,&bs)); 825 if (reuse == MAT_INITIAL_MATRIX) { 826 nz = bs2*(aa->nz + bb->nz); 827 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 828 PetscCall(PetscMalloc1(nz,&val)); 829 mumps->nnz = nz; 830 mumps->irn = row; 831 mumps->jcn = col; 832 mumps->val = mumps->val_alloc = val; 833 } else { 834 val = mumps->val; 835 } 836 837 jj = 0; irow = rstart; 838 for (i=0; i<mbs; i++) { 839 countA = ai[i+1] - ai[i]; 840 countB = bi[i+1] - bi[i]; 841 ajj = aj + ai[i]; 842 bjj = bj + bi[i]; 843 v1 = av + bs2*ai[i]; 844 v2 = bv + bs2*bi[i]; 845 846 idx = 0; 847 /* A-part */ 848 for (k=0; k<countA; k++) { 849 for (j=0; j<bs; j++) { 850 for (n=0; n<bs; n++) { 851 if (reuse == MAT_INITIAL_MATRIX) { 852 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 853 PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj])); 854 } 855 val[jj++] = v1[idx++]; 856 } 857 } 858 } 859 860 idx = 0; 861 /* B-part */ 862 for (k=0; k<countB; k++) { 863 for (j=0; j<bs; j++) { 864 for (n=0; n<bs; n++) { 865 if (reuse == MAT_INITIAL_MATRIX) { 866 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 867 PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj])); 868 } 869 val[jj++] = v2[idx++]; 870 } 871 } 872 } 873 irow += bs; 874 } 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 879 { 880 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 881 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 882 PetscMUMPSInt *row,*col; 883 const PetscScalar *av, *bv,*v1,*v2; 884 PetscScalar *val; 885 Mat Ad,Ao; 886 Mat_SeqAIJ *aa; 887 Mat_SeqAIJ *bb; 888 #if defined(PETSC_USE_COMPLEX) 889 PetscBool hermitian,isset; 890 #endif 891 892 PetscFunctionBegin; 893 #if defined(PETSC_USE_COMPLEX) 894 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 895 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 896 #endif 897 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 898 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 899 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 900 901 aa = (Mat_SeqAIJ*)(Ad)->data; 902 bb = (Mat_SeqAIJ*)(Ao)->data; 903 ai = aa->i; 904 aj = aa->j; 905 adiag = aa->diag; 906 bi = bb->i; 907 bj = bb->j; 908 909 rstart = A->rmap->rstart; 910 911 if (reuse == MAT_INITIAL_MATRIX) { 912 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 913 nzb = 0; /* num of upper triangular entries in mat->B */ 914 for (i=0; i<m; i++) { 915 nza += (ai[i+1] - adiag[i]); 916 countB = bi[i+1] - bi[i]; 917 bjj = bj + bi[i]; 918 for (j=0; j<countB; j++) { 919 if (garray[bjj[j]] > rstart) nzb++; 920 } 921 } 922 923 nz = nza + nzb; /* total nz of upper triangular part of mat */ 924 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 925 PetscCall(PetscMalloc1(nz,&val)); 926 mumps->nnz = nz; 927 mumps->irn = row; 928 mumps->jcn = col; 929 mumps->val = mumps->val_alloc = val; 930 } else { 931 val = mumps->val; 932 } 933 934 jj = 0; irow = rstart; 935 for (i=0; i<m; i++) { 936 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 937 v1 = av + adiag[i]; 938 countA = ai[i+1] - adiag[i]; 939 countB = bi[i+1] - bi[i]; 940 bjj = bj + bi[i]; 941 v2 = bv + bi[i]; 942 943 /* A-part */ 944 for (j=0; j<countA; j++) { 945 if (reuse == MAT_INITIAL_MATRIX) { 946 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 947 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 948 } 949 val[jj++] = v1[j]; 950 } 951 952 /* B-part */ 953 for (j=0; j < countB; j++) { 954 if (garray[bjj[j]] > rstart) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 957 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 958 } 959 val[jj++] = v2[j]; 960 } 961 } 962 irow++; 963 } 964 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 965 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode MatDestroy_MUMPS(Mat A) 970 { 971 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 972 973 PetscFunctionBegin; 974 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 975 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 976 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 977 PetscCall(VecDestroy(&mumps->b_seq)); 978 PetscCall(VecDestroy(&mumps->x_seq)); 979 PetscCall(PetscFree(mumps->id.perm_in)); 980 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 981 PetscCall(PetscFree(mumps->val_alloc)); 982 PetscCall(PetscFree(mumps->info)); 983 PetscCall(PetscFree(mumps->ICNTL_pre)); 984 PetscCall(PetscFree(mumps->CNTL_pre)); 985 PetscCall(MatMumpsResetSchur_Private(mumps)); 986 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 987 mumps->id.job = JOB_END; 988 PetscMUMPS_c(mumps); 989 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 990 if (mumps->mumps_comm != MPI_COMM_NULL) { 991 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 992 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 993 } 994 } 995 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 996 if (mumps->use_petsc_omp_support) { 997 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 998 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 999 PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps)); 1000 } 1001 #endif 1002 PetscCall(PetscFree(mumps->ia_alloc)); 1003 PetscCall(PetscFree(mumps->ja_alloc)); 1004 PetscCall(PetscFree(mumps->recvcount)); 1005 PetscCall(PetscFree(mumps->reqs)); 1006 PetscCall(PetscFree(mumps->irhs_loc)); 1007 PetscCall(PetscFree(A->data)); 1008 1009 /* clear composed functions */ 1010 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1011 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL)); 1012 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL)); 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL)); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1027 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1028 { 1029 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1030 const PetscMPIInt ompsize=mumps->omp_comm_size; 1031 PetscInt i,m,M,rstart; 1032 1033 PetscFunctionBegin; 1034 PetscCall(MatGetSize(A,&M,NULL)); 1035 PetscCall(MatGetLocalSize(A,&m,NULL)); 1036 PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1037 if (ompsize == 1) { 1038 if (!mumps->irhs_loc) { 1039 mumps->nloc_rhs = m; 1040 PetscCall(PetscMalloc1(m,&mumps->irhs_loc)); 1041 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1042 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1043 } 1044 mumps->id.rhs_loc = (MumpsScalar*)array; 1045 } else { 1046 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1047 const PetscInt *ranges; 1048 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1049 MPI_Group petsc_group,omp_group; 1050 PetscScalar *recvbuf=NULL; 1051 1052 if (mumps->is_omp_master) { 1053 /* Lazily initialize the omp stuff for distributed rhs */ 1054 if (!mumps->irhs_loc) { 1055 PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks)); 1056 PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps)); 1057 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group)); 1058 PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group)); 1059 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1060 PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks)); 1061 1062 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1063 mumps->nloc_rhs = 0; 1064 PetscCall(MatGetOwnershipRanges(A,&ranges)); 1065 for (j=0; j<ompsize; j++) { 1066 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1067 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1068 } 1069 PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc)); 1070 for (j=k=0; j<ompsize; j++) { 1071 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1072 } 1073 1074 PetscCall(PetscFree2(omp_ranks,petsc_ranks)); 1075 PetscCallMPI(MPI_Group_free(&petsc_group)); 1076 PetscCallMPI(MPI_Group_free(&omp_group)); 1077 } 1078 1079 /* Realloc buffers when current nrhs is bigger than what we have met */ 1080 if (nrhs > mumps->max_nrhs) { 1081 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 1082 PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf)); 1083 mumps->max_nrhs = nrhs; 1084 } 1085 1086 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1087 for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j])); 1088 mumps->rhs_disps[0] = 0; 1089 for (j=1; j<ompsize; j++) { 1090 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1091 PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1092 } 1093 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1094 } 1095 1096 PetscCall(PetscMPIIntCast(m*nrhs,&sendcount)); 1097 PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm)); 1098 1099 if (mumps->is_omp_master) { 1100 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1101 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1102 for (j=0; j<ompsize; j++) { 1103 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1104 dst = dstbase; 1105 for (i=0; i<nrhs; i++) { 1106 PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j])); 1107 src += mumps->rhs_nrow[j]; 1108 dst += mumps->nloc_rhs; 1109 } 1110 dstbase += mumps->rhs_nrow[j]; 1111 } 1112 } 1113 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1114 } 1115 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1116 } 1117 mumps->id.nrhs = nrhs; 1118 mumps->id.nloc_rhs = mumps->nloc_rhs; 1119 mumps->id.lrhs_loc = mumps->nloc_rhs; 1120 mumps->id.irhs_loc = mumps->irhs_loc; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1125 { 1126 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1127 const PetscScalar *rarray = NULL; 1128 PetscScalar *array; 1129 IS is_iden,is_petsc; 1130 PetscInt i; 1131 PetscBool second_solve = PETSC_FALSE; 1132 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1133 1134 PetscFunctionBegin; 1135 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1)); 1136 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2)); 1137 1138 if (A->factorerrortype) { 1139 PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1140 PetscCall(VecSetInf(x)); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 mumps->id.nrhs = 1; 1145 if (mumps->petsc_size > 1) { 1146 if (mumps->ICNTL20 == 10) { 1147 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1148 PetscCall(VecGetArrayRead(b,&rarray)); 1149 PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray)); 1150 } else { 1151 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1152 PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1153 PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1154 if (!mumps->myid) { 1155 PetscCall(VecGetArray(mumps->b_seq,&array)); 1156 mumps->id.rhs = (MumpsScalar*)array; 1157 } 1158 } 1159 } else { /* petsc_size == 1 */ 1160 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1161 PetscCall(VecCopy(b,x)); 1162 PetscCall(VecGetArray(x,&array)); 1163 mumps->id.rhs = (MumpsScalar*)array; 1164 } 1165 1166 /* 1167 handle condensation step of Schur complement (if any) 1168 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1169 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1170 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1171 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1172 */ 1173 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1174 PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1175 second_solve = PETSC_TRUE; 1176 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1177 } 1178 /* solve phase */ 1179 /*-------------*/ 1180 mumps->id.job = JOB_SOLVE; 1181 PetscMUMPS_c(mumps); 1182 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1183 1184 /* handle expansion step of Schur complement (if any) */ 1185 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1186 1187 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1188 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1189 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1190 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1191 } 1192 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1193 PetscInt *isol2_loc=NULL; 1194 PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */ 1195 PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc)); 1196 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc)); /* to */ 1198 PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol)); 1199 PetscCall(ISDestroy(&is_iden)); 1200 PetscCall(ISDestroy(&is_petsc)); 1201 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1202 } 1203 1204 PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1205 PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1206 } 1207 1208 if (mumps->petsc_size > 1) { 1209 if (mumps->ICNTL20 == 10) { 1210 PetscCall(VecRestoreArrayRead(b,&rarray)); 1211 } else if (!mumps->myid) { 1212 PetscCall(VecRestoreArray(mumps->b_seq,&array)); 1213 } 1214 } else PetscCall(VecRestoreArray(x,&array)); 1215 1216 PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3))); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1221 { 1222 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1223 1224 PetscFunctionBegin; 1225 mumps->id.ICNTL(9) = 0; 1226 PetscCall(MatSolve_MUMPS(A,b,x)); 1227 mumps->id.ICNTL(9) = 1; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1232 { 1233 Mat Bt = NULL; 1234 PetscBool denseX,denseB,flg,flgT; 1235 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1236 PetscInt i,nrhs,M; 1237 PetscScalar *array; 1238 const PetscScalar *rbray; 1239 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1240 PetscMUMPSInt *isol_loc,*isol_loc_save; 1241 PetscScalar *bray,*sol_loc,*sol_loc_save; 1242 IS is_to,is_from; 1243 PetscInt k,proc,j,m,myrstart; 1244 const PetscInt *rstart; 1245 Vec v_mpi,msol_loc; 1246 VecScatter scat_sol; 1247 Vec b_seq; 1248 VecScatter scat_rhs; 1249 PetscScalar *aa; 1250 PetscInt spnr,*ia,*ja; 1251 Mat_MPIAIJ *b = NULL; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL)); 1255 PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1256 1257 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL)); 1258 if (denseB) { 1259 PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1260 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1261 } else { /* sparse B */ 1262 PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1263 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT)); 1264 if (flgT) { /* input B is transpose of actural RHS matrix, 1265 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1266 PetscCall(MatTransposeGetMat(B,&Bt)); 1267 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1268 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1269 } 1270 1271 PetscCall(MatGetSize(B,&M,&nrhs)); 1272 mumps->id.nrhs = nrhs; 1273 mumps->id.lrhs = M; 1274 mumps->id.rhs = NULL; 1275 1276 if (mumps->petsc_size == 1) { 1277 PetscScalar *aa; 1278 PetscInt spnr,*ia,*ja; 1279 PetscBool second_solve = PETSC_FALSE; 1280 1281 PetscCall(MatDenseGetArray(X,&array)); 1282 mumps->id.rhs = (MumpsScalar*)array; 1283 1284 if (denseB) { 1285 /* copy B to X */ 1286 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1287 PetscCall(PetscArraycpy(array,rbray,M*nrhs)); 1288 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1289 } else { /* sparse B */ 1290 PetscCall(MatSeqAIJGetArray(Bt,&aa)); 1291 PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1292 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1293 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1294 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1295 } 1296 /* handle condensation step of Schur complement (if any) */ 1297 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1298 second_solve = PETSC_TRUE; 1299 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1300 } 1301 /* solve phase */ 1302 /*-------------*/ 1303 mumps->id.job = JOB_SOLVE; 1304 PetscMUMPS_c(mumps); 1305 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1306 1307 /* handle expansion step of Schur complement (if any) */ 1308 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1309 if (!denseB) { /* sparse B */ 1310 PetscCall(MatSeqAIJRestoreArray(Bt,&aa)); 1311 PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1312 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1313 } 1314 PetscCall(MatDenseRestoreArray(X,&array)); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1319 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1320 1321 /* create msol_loc to hold mumps local solution */ 1322 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1323 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1324 1325 lsol_loc = mumps->id.lsol_loc; 1326 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1327 PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc)); 1328 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1329 mumps->id.isol_loc = isol_loc; 1330 1331 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc)); 1332 1333 if (denseB) { 1334 if (mumps->ICNTL20 == 10) { 1335 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1336 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1337 PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray)); 1338 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1339 PetscCall(MatGetLocalSize(B,&m,NULL)); 1340 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi)); 1341 } else { 1342 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1343 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1344 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1345 0, re-arrange B into desired order, which is a local operation. 1346 */ 1347 1348 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1349 /* wrap dense rhs matrix B into a vector v_mpi */ 1350 PetscCall(MatGetLocalSize(B,&m,NULL)); 1351 PetscCall(MatDenseGetArray(B,&bray)); 1352 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1353 PetscCall(MatDenseRestoreArray(B,&bray)); 1354 1355 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1356 if (!mumps->myid) { 1357 PetscInt *idx; 1358 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1359 PetscCall(PetscMalloc1(nrhs*M,&idx)); 1360 PetscCall(MatGetOwnershipRanges(B,&rstart)); 1361 k = 0; 1362 for (proc=0; proc<mumps->petsc_size; proc++) { 1363 for (j=0; j<nrhs; j++) { 1364 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1365 } 1366 } 1367 1368 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq)); 1369 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to)); 1370 PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from)); 1371 } else { 1372 PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq)); 1373 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to)); 1374 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from)); 1375 } 1376 PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs)); 1377 PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1378 PetscCall(ISDestroy(&is_to)); 1379 PetscCall(ISDestroy(&is_from)); 1380 PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1381 1382 if (!mumps->myid) { /* define rhs on the host */ 1383 PetscCall(VecGetArray(b_seq,&bray)); 1384 mumps->id.rhs = (MumpsScalar*)bray; 1385 PetscCall(VecRestoreArray(b_seq,&bray)); 1386 } 1387 } 1388 } else { /* sparse B */ 1389 b = (Mat_MPIAIJ*)Bt->data; 1390 1391 /* wrap dense X into a vector v_mpi */ 1392 PetscCall(MatGetLocalSize(X,&m,NULL)); 1393 PetscCall(MatDenseGetArray(X,&bray)); 1394 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1395 PetscCall(MatDenseRestoreArray(X,&bray)); 1396 1397 if (!mumps->myid) { 1398 PetscCall(MatSeqAIJGetArray(b->A,&aa)); 1399 PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1400 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1401 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1402 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1403 } else { 1404 mumps->id.irhs_ptr = NULL; 1405 mumps->id.irhs_sparse = NULL; 1406 mumps->id.nz_rhs = 0; 1407 mumps->id.rhs_sparse = NULL; 1408 } 1409 } 1410 1411 /* solve phase */ 1412 /*-------------*/ 1413 mumps->id.job = JOB_SOLVE; 1414 PetscMUMPS_c(mumps); 1415 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1416 1417 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1418 PetscCall(MatDenseGetArray(X,&array)); 1419 PetscCall(VecPlaceArray(v_mpi,array)); 1420 1421 /* create scatter scat_sol */ 1422 PetscCall(MatGetOwnershipRanges(X,&rstart)); 1423 /* iidx: index for scatter mumps solution to petsc X */ 1424 1425 PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from)); 1426 PetscCall(PetscMalloc1(nlsol_loc,&idxx)); 1427 for (i=0; i<lsol_loc; i++) { 1428 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1429 1430 for (proc=0; proc<mumps->petsc_size; proc++) { 1431 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1432 myrstart = rstart[proc]; 1433 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1434 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1435 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1436 break; 1437 } 1438 } 1439 1440 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1441 } 1442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to)); 1443 PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol)); 1444 PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1445 PetscCall(ISDestroy(&is_from)); 1446 PetscCall(ISDestroy(&is_to)); 1447 PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1448 PetscCall(MatDenseRestoreArray(X,&array)); 1449 1450 /* free spaces */ 1451 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1452 mumps->id.isol_loc = isol_loc_save; 1453 1454 PetscCall(PetscFree2(sol_loc,isol_loc)); 1455 PetscCall(PetscFree(idxx)); 1456 PetscCall(VecDestroy(&msol_loc)); 1457 PetscCall(VecDestroy(&v_mpi)); 1458 if (!denseB) { 1459 if (!mumps->myid) { 1460 b = (Mat_MPIAIJ*)Bt->data; 1461 PetscCall(MatSeqAIJRestoreArray(b->A,&aa)); 1462 PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1463 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1464 } 1465 } else { 1466 if (mumps->ICNTL20 == 0) { 1467 PetscCall(VecDestroy(&b_seq)); 1468 PetscCall(VecScatterDestroy(&scat_rhs)); 1469 } 1470 } 1471 PetscCall(VecScatterDestroy(&scat_sol)); 1472 PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3))); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A,Mat B,Mat X) 1477 { 1478 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1479 PetscMUMPSInt oldvalue = mumps->id.ICNTL(9); 1480 1481 PetscFunctionBegin; 1482 mumps->id.ICNTL(9) = 0; 1483 PetscCall(MatMatSolve_MUMPS(A,B,X)); 1484 mumps->id.ICNTL(9) = oldvalue; 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1489 { 1490 PetscBool flg; 1491 Mat B; 1492 1493 PetscFunctionBegin; 1494 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 1495 PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1496 1497 /* Create B=Bt^T that uses Bt's data structure */ 1498 PetscCall(MatCreateTranspose(Bt,&B)); 1499 1500 PetscCall(MatMatSolve_MUMPS(A,B,X)); 1501 PetscCall(MatDestroy(&B)); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #if !defined(PETSC_USE_COMPLEX) 1506 /* 1507 input: 1508 F: numeric factor 1509 output: 1510 nneg: total number of negative pivots 1511 nzero: total number of zero pivots 1512 npos: (global dimension of F) - nneg - nzero 1513 */ 1514 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1515 { 1516 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1517 PetscMPIInt size; 1518 1519 PetscFunctionBegin; 1520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size)); 1521 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1522 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1523 1524 if (nneg) *nneg = mumps->id.INFOG(12); 1525 if (nzero || npos) { 1526 PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1527 if (nzero) *nzero = mumps->id.INFOG(28); 1528 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 #endif 1533 1534 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1535 { 1536 PetscInt i,nreqs; 1537 PetscMUMPSInt *irn,*jcn; 1538 PetscMPIInt count; 1539 PetscInt64 totnnz,remain; 1540 const PetscInt osize=mumps->omp_comm_size; 1541 PetscScalar *val; 1542 1543 PetscFunctionBegin; 1544 if (osize > 1) { 1545 if (reuse == MAT_INITIAL_MATRIX) { 1546 /* master first gathers counts of nonzeros to receive */ 1547 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount)); 1548 PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm)); 1549 1550 /* Then each computes number of send/recvs */ 1551 if (mumps->is_omp_master) { 1552 /* Start from 1 since self communication is not done in MPI */ 1553 nreqs = 0; 1554 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1555 } else { 1556 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1557 } 1558 PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1559 1560 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1561 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1562 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1563 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1564 */ 1565 nreqs = 0; /* counter for actual send/recvs */ 1566 if (mumps->is_omp_master) { 1567 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1568 PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn)); 1569 PetscCall(PetscMalloc1(totnnz,&val)); 1570 1571 /* Self communication */ 1572 PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz)); 1573 PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz)); 1574 PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz)); 1575 1576 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1577 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 1578 PetscCall(PetscFree(mumps->val_alloc)); 1579 mumps->nnz = totnnz; 1580 mumps->irn = irn; 1581 mumps->jcn = jcn; 1582 mumps->val = mumps->val_alloc = val; 1583 1584 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1585 jcn += mumps->recvcount[0]; 1586 val += mumps->recvcount[0]; 1587 1588 /* Remote communication */ 1589 for (i=1; i<osize; i++) { 1590 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1591 remain = mumps->recvcount[i] - count; 1592 while (count>0) { 1593 PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1594 PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1595 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1596 irn += count; 1597 jcn += count; 1598 val += count; 1599 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1600 remain -= count; 1601 } 1602 } 1603 } else { 1604 irn = mumps->irn; 1605 jcn = mumps->jcn; 1606 val = mumps->val; 1607 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1608 remain = mumps->nnz - count; 1609 while (count>0) { 1610 PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1611 PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1612 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1613 irn += count; 1614 jcn += count; 1615 val += count; 1616 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1617 remain -= count; 1618 } 1619 } 1620 } else { 1621 nreqs = 0; 1622 if (mumps->is_omp_master) { 1623 val = mumps->val + mumps->recvcount[0]; 1624 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1625 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1626 remain = mumps->recvcount[i] - count; 1627 while (count>0) { 1628 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1629 val += count; 1630 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1631 remain -= count; 1632 } 1633 } 1634 } else { 1635 val = mumps->val; 1636 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1637 remain = mumps->nnz - count; 1638 while (count>0) { 1639 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1640 val += count; 1641 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1642 remain -= count; 1643 } 1644 } 1645 } 1646 PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE)); 1647 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1653 { 1654 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1655 PetscBool isMPIAIJ; 1656 1657 PetscFunctionBegin; 1658 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1659 if (mumps->id.INFOG(1) == -6) { 1660 PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1661 } 1662 PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1667 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps)); 1668 1669 /* numerical factorization phase */ 1670 /*-------------------------------*/ 1671 mumps->id.job = JOB_FACTNUMERIC; 1672 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1673 if (!mumps->myid) { 1674 mumps->id.a = (MumpsScalar*)mumps->val; 1675 } 1676 } else { 1677 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1678 } 1679 PetscMUMPS_c(mumps); 1680 if (mumps->id.INFOG(1) < 0) { 1681 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1682 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1683 PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1684 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1685 } else if (mumps->id.INFOG(1) == -13) { 1686 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1687 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1688 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1689 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1690 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1691 } else { 1692 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1693 F->factorerrortype = MAT_FACTOR_OTHER; 1694 } 1695 } 1696 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1697 1698 F->assembled = PETSC_TRUE; 1699 1700 if (F->schur) { /* reset Schur status to unfactored */ 1701 #if defined(PETSC_HAVE_CUDA) 1702 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1703 #endif 1704 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1705 mumps->id.ICNTL(19) = 2; 1706 PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur)); 1707 } 1708 PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED)); 1709 } 1710 1711 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1712 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1713 1714 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1715 if (mumps->petsc_size > 1) { 1716 PetscInt lsol_loc; 1717 PetscScalar *sol_loc; 1718 1719 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ)); 1720 1721 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1722 if (mumps->x_seq) { 1723 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1724 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 1725 PetscCall(VecDestroy(&mumps->x_seq)); 1726 } 1727 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1728 PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc)); 1729 mumps->id.lsol_loc = lsol_loc; 1730 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1731 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq)); 1732 } 1733 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 /* Sets MUMPS options from the options database */ 1738 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1739 { 1740 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1741 PetscMUMPSInt icntl=0,size,*listvar_schur; 1742 PetscInt info[80],i,ninfo=80,rbs,cbs; 1743 PetscBool flg=PETSC_FALSE,schur=(PetscBool)(mumps->id.ICNTL(26) == -1); 1744 MumpsScalar *arr; 1745 1746 PetscFunctionBegin; 1747 PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat"); 1748 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1749 PetscInt nthreads = 0; 1750 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1751 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1752 1753 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1754 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size)); 1755 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */ 1756 1757 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads","Convert MPI processes into OpenMP threads","None",&mumps->use_petsc_omp_support)); 1758 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1759 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1760 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL)); 1761 if (mumps->use_petsc_omp_support) { 1762 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT),PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)F)->prefix?((PetscObject)F)->prefix:""); 1763 PetscCheck(!schur,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature",((PetscObject)F)->prefix?((PetscObject)F)->prefix:""); 1764 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1765 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl)); 1766 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master)); 1767 #endif 1768 } else { 1769 mumps->omp_comm = PETSC_COMM_SELF; 1770 mumps->mumps_comm = mumps->petsc_comm; 1771 mumps->is_omp_master = PETSC_TRUE; 1772 } 1773 PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size)); 1774 mumps->reqs = NULL; 1775 mumps->tag = 0; 1776 1777 if (mumps->mumps_comm != MPI_COMM_NULL) { 1778 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1779 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1780 MPI_Comm comm; 1781 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm,&comm)); 1782 mumps->mumps_comm = comm; 1783 } else PetscCall(PetscCommGetComm(mumps->petsc_comm,&mumps->mumps_comm)); 1784 } 1785 1786 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1787 mumps->id.job = JOB_INIT; 1788 mumps->id.par = 1; /* host participates factorizaton and solve */ 1789 mumps->id.sym = mumps->sym; 1790 1791 size = mumps->id.size_schur; 1792 arr = mumps->id.schur; 1793 listvar_schur = mumps->id.listvar_schur; 1794 PetscMUMPS_c(mumps); 1795 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1796 /* restore cached ICNTL and CNTL values */ 1797 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1+2*icntl]) = mumps->ICNTL_pre[2+2*icntl]; 1798 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1+2*icntl]) = mumps->CNTL_pre[2+2*icntl]; 1799 PetscCall(PetscFree(mumps->ICNTL_pre)); 1800 PetscCall(PetscFree(mumps->CNTL_pre)); 1801 1802 if (schur) { 1803 mumps->id.size_schur = size; 1804 mumps->id.schur_lld = size; 1805 mumps->id.schur = arr; 1806 mumps->id.listvar_schur = listvar_schur; 1807 if (mumps->petsc_size > 1) { 1808 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1809 1810 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1811 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1812 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm)); 1813 PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1814 } else { 1815 if (F->factortype == MAT_FACTOR_LU) { 1816 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1817 } else { 1818 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1819 } 1820 } 1821 mumps->id.ICNTL(26) = -1; 1822 } 1823 1824 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1825 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1826 */ 1827 PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm)); 1828 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm)); 1829 1830 mumps->scat_rhs = NULL; 1831 mumps->scat_sol = NULL; 1832 1833 /* set PETSc-MUMPS default options - override MUMPS default */ 1834 mumps->id.ICNTL(3) = 0; 1835 mumps->id.ICNTL(4) = 0; 1836 if (mumps->petsc_size == 1) { 1837 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1838 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1839 } else { 1840 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1841 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1842 } 1843 } 1844 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg)); 1845 if (flg) mumps->id.ICNTL(1) = icntl; 1846 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg)); 1847 if (flg) mumps->id.ICNTL(2) = icntl; 1848 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg)); 1849 if (flg) mumps->id.ICNTL(3) = icntl; 1850 1851 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg)); 1852 if (flg) mumps->id.ICNTL(4) = icntl; 1853 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1854 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg)); 1856 if (flg) mumps->id.ICNTL(6) = icntl; 1857 1858 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg)); 1859 if (flg) { 1860 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1861 mumps->id.ICNTL(7) = icntl; 1862 } 1863 1864 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL)); 1865 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1866 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL)); 1867 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL)); 1868 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL)); 1869 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL)); 1870 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL)); 1871 PetscCall(MatGetBlockSizes(A,&rbs,&cbs)); 1872 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1873 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg)); 1874 if (flg) { 1875 PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled"); 1876 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1877 } 1878 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL)); 1879 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1880 PetscCall(MatDestroy(&F->schur)); 1881 PetscCall(MatMumpsResetSchur_Private(mumps)); 1882 } 1883 1884 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1885 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1886 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1887 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1888 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1889 In short, we could not use distributed RHS with MPICH until v4.0b1. 1890 */ 1891 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1892 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1893 #else 1894 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1895 #endif 1896 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg)); 1897 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1898 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1899 PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1900 #endif 1901 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1902 1903 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL)); 1904 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL)); 1905 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL)); 1906 if (mumps->id.ICNTL(24)) { 1907 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1908 } 1909 1910 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL)); 1911 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL)); 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL)); 1913 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL)); 1914 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL)); 1915 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1916 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL)); 1917 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1918 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL)); 1919 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL)); 1920 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL)); 1921 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL)); 1922 1923 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL)); 1924 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL)); 1925 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL)); 1926 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL)); 1927 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL)); 1928 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL)); 1929 1930 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1931 1932 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL)); 1933 if (ninfo) { 1934 PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1935 PetscCall(PetscMalloc1(ninfo,&mumps->info)); 1936 mumps->ninfo = ninfo; 1937 for (i=0; i<ninfo; i++) { 1938 PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1939 mumps->info[i] = info[i]; 1940 } 1941 } 1942 PetscOptionsEnd(); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1947 { 1948 PetscFunctionBegin; 1949 if (mumps->id.INFOG(1) < 0) { 1950 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1951 if (mumps->id.INFOG(1) == -6) { 1952 PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1953 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1954 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1955 PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1956 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1957 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1958 PetscCall(PetscInfo(F,"Empty matrix\n")); 1959 } else { 1960 PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1961 F->factorerrortype = MAT_FACTOR_OTHER; 1962 } 1963 } 1964 PetscFunctionReturn(0); 1965 } 1966 1967 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1968 { 1969 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1970 Vec b; 1971 const PetscInt M = A->rmap->N; 1972 1973 PetscFunctionBegin; 1974 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1975 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1976 PetscFunctionReturn(0); 1977 } 1978 1979 /* Set MUMPS options from the options database */ 1980 PetscCall(MatSetFromOptions_MUMPS(F,A)); 1981 1982 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1983 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 1984 1985 /* analysis phase */ 1986 /*----------------*/ 1987 mumps->id.job = JOB_FACTSYMBOLIC; 1988 mumps->id.n = M; 1989 switch (mumps->id.ICNTL(18)) { 1990 case 0: /* centralized assembled matrix input */ 1991 if (!mumps->myid) { 1992 mumps->id.nnz = mumps->nnz; 1993 mumps->id.irn = mumps->irn; 1994 mumps->id.jcn = mumps->jcn; 1995 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1996 if (r) { 1997 mumps->id.ICNTL(7) = 1; 1998 if (!mumps->myid) { 1999 const PetscInt *idx; 2000 PetscInt i; 2001 2002 PetscCall(PetscMalloc1(M,&mumps->id.perm_in)); 2003 PetscCall(ISGetIndices(r,&idx)); 2004 for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 2005 PetscCall(ISRestoreIndices(r,&idx)); 2006 } 2007 } 2008 } 2009 break; 2010 case 3: /* distributed assembled matrix input (size>1) */ 2011 mumps->id.nnz_loc = mumps->nnz; 2012 mumps->id.irn_loc = mumps->irn; 2013 mumps->id.jcn_loc = mumps->jcn; 2014 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 2015 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2016 PetscCall(MatCreateVecs(A,NULL,&b)); 2017 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2018 PetscCall(VecDestroy(&b)); 2019 } 2020 break; 2021 } 2022 PetscMUMPS_c(mumps); 2023 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2024 2025 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2026 F->ops->solve = MatSolve_MUMPS; 2027 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2028 F->ops->matsolve = MatMatSolve_MUMPS; 2029 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2030 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2031 2032 mumps->matstruc = SAME_NONZERO_PATTERN; 2033 PetscFunctionReturn(0); 2034 } 2035 2036 /* Note the Petsc r and c permutations are ignored */ 2037 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2038 { 2039 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2040 Vec b; 2041 const PetscInt M = A->rmap->N; 2042 2043 PetscFunctionBegin; 2044 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2045 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2046 PetscFunctionReturn(0); 2047 } 2048 2049 /* Set MUMPS options from the options database */ 2050 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2051 2052 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2053 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2054 2055 /* analysis phase */ 2056 /*----------------*/ 2057 mumps->id.job = JOB_FACTSYMBOLIC; 2058 mumps->id.n = M; 2059 switch (mumps->id.ICNTL(18)) { 2060 case 0: /* centralized assembled matrix input */ 2061 if (!mumps->myid) { 2062 mumps->id.nnz = mumps->nnz; 2063 mumps->id.irn = mumps->irn; 2064 mumps->id.jcn = mumps->jcn; 2065 if (mumps->id.ICNTL(6)>1) { 2066 mumps->id.a = (MumpsScalar*)mumps->val; 2067 } 2068 } 2069 break; 2070 case 3: /* distributed assembled matrix input (size>1) */ 2071 mumps->id.nnz_loc = mumps->nnz; 2072 mumps->id.irn_loc = mumps->irn; 2073 mumps->id.jcn_loc = mumps->jcn; 2074 if (mumps->id.ICNTL(6)>1) { 2075 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2076 } 2077 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2078 PetscCall(MatCreateVecs(A,NULL,&b)); 2079 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2080 PetscCall(VecDestroy(&b)); 2081 } 2082 break; 2083 } 2084 PetscMUMPS_c(mumps); 2085 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2086 2087 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2088 F->ops->solve = MatSolve_MUMPS; 2089 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2090 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2091 2092 mumps->matstruc = SAME_NONZERO_PATTERN; 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /* Note the Petsc r permutation and factor info are ignored */ 2097 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2098 { 2099 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2100 Vec b; 2101 const PetscInt M = A->rmap->N; 2102 2103 PetscFunctionBegin; 2104 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2105 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2106 PetscFunctionReturn(0); 2107 } 2108 2109 /* Set MUMPS options from the options database */ 2110 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2111 2112 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2113 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2114 2115 /* analysis phase */ 2116 /*----------------*/ 2117 mumps->id.job = JOB_FACTSYMBOLIC; 2118 mumps->id.n = M; 2119 switch (mumps->id.ICNTL(18)) { 2120 case 0: /* centralized assembled matrix input */ 2121 if (!mumps->myid) { 2122 mumps->id.nnz = mumps->nnz; 2123 mumps->id.irn = mumps->irn; 2124 mumps->id.jcn = mumps->jcn; 2125 if (mumps->id.ICNTL(6)>1) { 2126 mumps->id.a = (MumpsScalar*)mumps->val; 2127 } 2128 } 2129 break; 2130 case 3: /* distributed assembled matrix input (size>1) */ 2131 mumps->id.nnz_loc = mumps->nnz; 2132 mumps->id.irn_loc = mumps->irn; 2133 mumps->id.jcn_loc = mumps->jcn; 2134 if (mumps->id.ICNTL(6)>1) { 2135 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2136 } 2137 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2138 PetscCall(MatCreateVecs(A,NULL,&b)); 2139 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2140 PetscCall(VecDestroy(&b)); 2141 } 2142 break; 2143 } 2144 PetscMUMPS_c(mumps); 2145 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2146 2147 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2148 F->ops->solve = MatSolve_MUMPS; 2149 F->ops->solvetranspose = MatSolve_MUMPS; 2150 F->ops->matsolve = MatMatSolve_MUMPS; 2151 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2152 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2153 #if defined(PETSC_USE_COMPLEX) 2154 F->ops->getinertia = NULL; 2155 #else 2156 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2157 #endif 2158 2159 mumps->matstruc = SAME_NONZERO_PATTERN; 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2164 { 2165 PetscBool iascii; 2166 PetscViewerFormat format; 2167 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2168 2169 PetscFunctionBegin; 2170 /* check if matrix is mumps type */ 2171 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2172 2173 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 2174 if (iascii) { 2175 PetscCall(PetscViewerGetFormat(viewer,&format)); 2176 if (format == PETSC_VIEWER_ASCII_INFO) { 2177 PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n")); 2178 PetscCall(PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym)); 2179 PetscCall(PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par)); 2180 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7))); 2187 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8))); 2188 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11))); 2190 if (mumps->id.ICNTL(11)>0) { 2191 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4))); 2192 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5))); 2193 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6))); 2194 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11))); 2197 } 2198 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14))); 2201 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(15) (compression of the input matrix): %d\n",mumps->id.ICNTL(15))); 2202 /* ICNTL(15-17) not used */ 2203 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18))); 2204 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19))); 2205 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22))); 2208 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23))); 2209 2210 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24))); 2211 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25))); 2212 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28))); 2215 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29))); 2216 2217 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30))); 2218 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31))); 2219 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33))); 2220 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35))); 2221 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36))); 2222 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38))); 2223 2224 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1))); 2225 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2))); 2226 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3))); 2227 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4))); 2228 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5))); 2229 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7))); 2230 2231 /* information local to each processor */ 2232 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2233 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2234 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1))); 2235 PetscCall(PetscViewerFlush(viewer)); 2236 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2237 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2))); 2238 PetscCall(PetscViewerFlush(viewer)); 2239 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2240 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3))); 2241 PetscCall(PetscViewerFlush(viewer)); 2242 2243 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2244 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15))); 2245 PetscCall(PetscViewerFlush(viewer)); 2246 2247 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2248 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16))); 2249 PetscCall(PetscViewerFlush(viewer)); 2250 2251 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2252 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23))); 2253 PetscCall(PetscViewerFlush(viewer)); 2254 2255 if (mumps->ninfo && mumps->ninfo <= 80) { 2256 PetscInt i; 2257 for (i=0; i<mumps->ninfo; i++) { 2258 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i])); 2259 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]))); 2260 PetscCall(PetscViewerFlush(viewer)); 2261 } 2262 } 2263 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2264 2265 if (!mumps->myid) { /* information from the host */ 2266 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34))); 2270 2271 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28))); 2295 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29))); 2296 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31))); 2297 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32))); 2298 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33))); 2299 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34))); 2300 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35))); 2301 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36))); 2302 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37))); 2303 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38))); 2304 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39))); 2305 } 2306 } 2307 } 2308 PetscFunctionReturn(0); 2309 } 2310 2311 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2312 { 2313 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2314 2315 PetscFunctionBegin; 2316 info->block_size = 1.0; 2317 info->nz_allocated = mumps->id.INFOG(20); 2318 info->nz_used = mumps->id.INFOG(20); 2319 info->nz_unneeded = 0.0; 2320 info->assemblies = 0.0; 2321 info->mallocs = 0.0; 2322 info->memory = 0.0; 2323 info->fill_ratio_given = 0; 2324 info->fill_ratio_needed = 0; 2325 info->factor_mallocs = 0; 2326 PetscFunctionReturn(0); 2327 } 2328 2329 /* -------------------------------------------------------------------------------------------*/ 2330 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2331 { 2332 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2333 const PetscScalar *arr; 2334 const PetscInt *idxs; 2335 PetscInt size,i; 2336 2337 PetscFunctionBegin; 2338 PetscCall(ISGetLocalSize(is,&size)); 2339 /* Schur complement matrix */ 2340 PetscCall(MatDestroy(&F->schur)); 2341 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur)); 2342 PetscCall(MatDenseGetArrayRead(F->schur,&arr)); 2343 mumps->id.schur = (MumpsScalar*)arr; 2344 mumps->id.size_schur = size; 2345 mumps->id.schur_lld = size; 2346 PetscCall(MatDenseRestoreArrayRead(F->schur,&arr)); 2347 if (mumps->sym == 1) { 2348 PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE)); 2349 } 2350 2351 /* MUMPS expects Fortran style indices */ 2352 PetscCall(PetscFree(mumps->id.listvar_schur)); 2353 PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur)); 2354 PetscCall(ISGetIndices(is,&idxs)); 2355 for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]))); 2356 PetscCall(ISRestoreIndices(is,&idxs)); 2357 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2358 mumps->id.ICNTL(26) = -1; 2359 PetscFunctionReturn(0); 2360 } 2361 2362 /* -------------------------------------------------------------------------------------------*/ 2363 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2364 { 2365 Mat St; 2366 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2367 PetscScalar *array; 2368 #if defined(PETSC_USE_COMPLEX) 2369 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2370 #endif 2371 2372 PetscFunctionBegin; 2373 PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2374 PetscCall(MatCreate(PETSC_COMM_SELF,&St)); 2375 PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur)); 2376 PetscCall(MatSetType(St,MATDENSE)); 2377 PetscCall(MatSetUp(St)); 2378 PetscCall(MatDenseGetArray(St,&array)); 2379 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2380 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2381 PetscInt i,j,N=mumps->id.size_schur; 2382 for (i=0;i<N;i++) { 2383 for (j=0;j<N;j++) { 2384 #if !defined(PETSC_USE_COMPLEX) 2385 PetscScalar val = mumps->id.schur[i*N+j]; 2386 #else 2387 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2388 #endif 2389 array[j*N+i] = val; 2390 } 2391 } 2392 } else { /* stored by columns */ 2393 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2394 } 2395 } else { /* either full or lower-triangular (not packed) */ 2396 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2397 PetscInt i,j,N=mumps->id.size_schur; 2398 for (i=0;i<N;i++) { 2399 for (j=i;j<N;j++) { 2400 #if !defined(PETSC_USE_COMPLEX) 2401 PetscScalar val = mumps->id.schur[i*N+j]; 2402 #else 2403 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2404 #endif 2405 array[i*N+j] = val; 2406 array[j*N+i] = val; 2407 } 2408 } 2409 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2410 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2411 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2412 PetscInt i,j,N=mumps->id.size_schur; 2413 for (i=0;i<N;i++) { 2414 for (j=0;j<i+1;j++) { 2415 #if !defined(PETSC_USE_COMPLEX) 2416 PetscScalar val = mumps->id.schur[i*N+j]; 2417 #else 2418 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2419 #endif 2420 array[i*N+j] = val; 2421 array[j*N+i] = val; 2422 } 2423 } 2424 } 2425 } 2426 PetscCall(MatDenseRestoreArray(St,&array)); 2427 *S = St; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 /* -------------------------------------------------------------------------------------------*/ 2432 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2433 { 2434 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2435 2436 PetscFunctionBegin; 2437 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2438 PetscInt i,nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2439 for (i = 0; i < nICNTL_pre; ++i) if (mumps->ICNTL_pre[1+2*i] == icntl) break; /* is this ICNTL already cached? */ 2440 if (i == nICNTL_pre) { /* not already cached */ 2441 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt)*(2*nICNTL_pre + 3),&mumps->ICNTL_pre)); 2442 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt)*3,&mumps->ICNTL_pre)); 2443 mumps->ICNTL_pre[0]++; 2444 } 2445 mumps->ICNTL_pre[1+2*i] = icntl; 2446 PetscCall(PetscMUMPSIntCast(ival,mumps->ICNTL_pre+2+2*i)); 2447 } else PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl))); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2452 { 2453 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2454 2455 PetscFunctionBegin; 2456 *ival = mumps->id.ICNTL(icntl); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 /*@ 2461 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2462 2463 Logically Collective on Mat 2464 2465 Input Parameters: 2466 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2467 . icntl - index of MUMPS parameter array ICNTL() 2468 - ival - value of MUMPS ICNTL(icntl) 2469 2470 Options Database: 2471 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2472 2473 Level: beginner 2474 2475 References: 2476 . * - MUMPS Users' Guide 2477 2478 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2479 @*/ 2480 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2481 { 2482 PetscFunctionBegin; 2483 PetscValidType(F,1); 2484 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2485 PetscValidLogicalCollectiveInt(F,icntl,2); 2486 PetscValidLogicalCollectiveInt(F,ival,3); 2487 PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl); 2488 PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival)); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 /*@ 2493 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2494 2495 Logically Collective on Mat 2496 2497 Input Parameters: 2498 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2499 - icntl - index of MUMPS parameter array ICNTL() 2500 2501 Output Parameter: 2502 . ival - value of MUMPS ICNTL(icntl) 2503 2504 Level: beginner 2505 2506 References: 2507 . * - MUMPS Users' Guide 2508 2509 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2510 @*/ 2511 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2512 { 2513 PetscFunctionBegin; 2514 PetscValidType(F,1); 2515 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2516 PetscValidLogicalCollectiveInt(F,icntl,2); 2517 PetscValidIntPointer(ival,3); 2518 PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl); 2519 PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 /* -------------------------------------------------------------------------------------------*/ 2524 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2525 { 2526 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2527 2528 PetscFunctionBegin; 2529 if (mumps->id.job == JOB_NULL) { 2530 PetscInt i,nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2531 for (i = 0; i < nCNTL_pre; ++i) if (mumps->CNTL_pre[1+2*i] == icntl) break; 2532 if (i == nCNTL_pre) { 2533 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal)*(2*nCNTL_pre + 3),&mumps->CNTL_pre)); 2534 else PetscCall(PetscCalloc(sizeof(PetscReal)*3,&mumps->CNTL_pre)); 2535 mumps->CNTL_pre[0]++; 2536 } 2537 mumps->CNTL_pre[1+2*i] = icntl; 2538 mumps->CNTL_pre[2+2*i] = val; 2539 } else mumps->id.CNTL(icntl) = val; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2544 { 2545 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2546 2547 PetscFunctionBegin; 2548 *val = mumps->id.CNTL(icntl); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 /*@ 2553 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2554 2555 Logically Collective on Mat 2556 2557 Input Parameters: 2558 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2559 . icntl - index of MUMPS parameter array CNTL() 2560 - val - value of MUMPS CNTL(icntl) 2561 2562 Options Database: 2563 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2564 2565 Level: beginner 2566 2567 References: 2568 . * - MUMPS Users' Guide 2569 2570 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2571 @*/ 2572 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2573 { 2574 PetscFunctionBegin; 2575 PetscValidType(F,1); 2576 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2577 PetscValidLogicalCollectiveInt(F,icntl,2); 2578 PetscValidLogicalCollectiveReal(F,val,3); 2579 PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl); 2580 PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val)); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2586 2587 Logically Collective on Mat 2588 2589 Input Parameters: 2590 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2591 - icntl - index of MUMPS parameter array CNTL() 2592 2593 Output Parameter: 2594 . val - value of MUMPS CNTL(icntl) 2595 2596 Level: beginner 2597 2598 References: 2599 . * - MUMPS Users' Guide 2600 2601 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2602 @*/ 2603 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2604 { 2605 PetscFunctionBegin; 2606 PetscValidType(F,1); 2607 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2608 PetscValidLogicalCollectiveInt(F,icntl,2); 2609 PetscValidRealPointer(val,3); 2610 PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl); 2611 PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2616 { 2617 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2618 2619 PetscFunctionBegin; 2620 *info = mumps->id.INFO(icntl); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2625 { 2626 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2627 2628 PetscFunctionBegin; 2629 *infog = mumps->id.INFOG(icntl); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2634 { 2635 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2636 2637 PetscFunctionBegin; 2638 *rinfo = mumps->id.RINFO(icntl); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2643 { 2644 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2645 2646 PetscFunctionBegin; 2647 *rinfog = mumps->id.RINFOG(icntl); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2652 { 2653 Mat Bt = NULL,Btseq = NULL; 2654 PetscBool flg; 2655 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2656 PetscScalar *aa; 2657 PetscInt spnr,*ia,*ja,M,nrhs; 2658 2659 PetscFunctionBegin; 2660 PetscValidPointer(spRHS,2); 2661 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg)); 2662 if (flg) { 2663 PetscCall(MatTransposeGetMat(spRHS,&Bt)); 2664 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2665 2666 PetscCall(MatMumpsSetIcntl(F,30,1)); 2667 2668 if (mumps->petsc_size > 1) { 2669 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2670 Btseq = b->A; 2671 } else { 2672 Btseq = Bt; 2673 } 2674 2675 PetscCall(MatGetSize(spRHS,&M,&nrhs)); 2676 mumps->id.nrhs = nrhs; 2677 mumps->id.lrhs = M; 2678 mumps->id.rhs = NULL; 2679 2680 if (!mumps->myid) { 2681 PetscCall(MatSeqAIJGetArray(Btseq,&aa)); 2682 PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2683 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2684 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 2685 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2686 } else { 2687 mumps->id.irhs_ptr = NULL; 2688 mumps->id.irhs_sparse = NULL; 2689 mumps->id.nz_rhs = 0; 2690 mumps->id.rhs_sparse = NULL; 2691 } 2692 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2693 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2694 2695 /* solve phase */ 2696 /*-------------*/ 2697 mumps->id.job = JOB_SOLVE; 2698 PetscMUMPS_c(mumps); 2699 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2700 2701 if (!mumps->myid) { 2702 PetscCall(MatSeqAIJRestoreArray(Btseq,&aa)); 2703 PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2704 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2705 } 2706 PetscFunctionReturn(0); 2707 } 2708 2709 /*@ 2710 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2711 2712 Logically Collective on Mat 2713 2714 Input Parameters: 2715 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2716 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2717 2718 Output Parameter: 2719 . spRHS - requested entries of inverse of A 2720 2721 Level: beginner 2722 2723 References: 2724 . * - MUMPS Users' Guide 2725 2726 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2727 @*/ 2728 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2729 { 2730 PetscFunctionBegin; 2731 PetscValidType(F,1); 2732 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2733 PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS)); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2738 { 2739 Mat spRHS; 2740 2741 PetscFunctionBegin; 2742 PetscCall(MatCreateTranspose(spRHST,&spRHS)); 2743 PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS)); 2744 PetscCall(MatDestroy(&spRHS)); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 /*@ 2749 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2750 2751 Logically Collective on Mat 2752 2753 Input Parameters: 2754 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2755 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2756 2757 Output Parameter: 2758 . spRHST - requested entries of inverse of A^T 2759 2760 Level: beginner 2761 2762 References: 2763 . * - MUMPS Users' Guide 2764 2765 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2766 @*/ 2767 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2768 { 2769 PetscBool flg; 2770 2771 PetscFunctionBegin; 2772 PetscValidType(F,1); 2773 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2774 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 2775 PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2776 2777 PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST)); 2778 PetscFunctionReturn(0); 2779 } 2780 2781 /*@ 2782 MatMumpsGetInfo - Get MUMPS parameter INFO() 2783 2784 Logically Collective on Mat 2785 2786 Input Parameters: 2787 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2788 - icntl - index of MUMPS parameter array INFO() 2789 2790 Output Parameter: 2791 . ival - value of MUMPS INFO(icntl) 2792 2793 Level: beginner 2794 2795 References: 2796 . * - MUMPS Users' Guide 2797 2798 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2799 @*/ 2800 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2801 { 2802 PetscFunctionBegin; 2803 PetscValidType(F,1); 2804 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2805 PetscValidIntPointer(ival,3); 2806 PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2807 PetscFunctionReturn(0); 2808 } 2809 2810 /*@ 2811 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2812 2813 Logically Collective on Mat 2814 2815 Input Parameters: 2816 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2817 - icntl - index of MUMPS parameter array INFOG() 2818 2819 Output Parameter: 2820 . ival - value of MUMPS INFOG(icntl) 2821 2822 Level: beginner 2823 2824 References: 2825 . * - MUMPS Users' Guide 2826 2827 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2828 @*/ 2829 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2830 { 2831 PetscFunctionBegin; 2832 PetscValidType(F,1); 2833 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2834 PetscValidIntPointer(ival,3); 2835 PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2836 PetscFunctionReturn(0); 2837 } 2838 2839 /*@ 2840 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2841 2842 Logically Collective on Mat 2843 2844 Input Parameters: 2845 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2846 - icntl - index of MUMPS parameter array RINFO() 2847 2848 Output Parameter: 2849 . val - value of MUMPS RINFO(icntl) 2850 2851 Level: beginner 2852 2853 References: 2854 . * - MUMPS Users' Guide 2855 2856 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2857 @*/ 2858 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2859 { 2860 PetscFunctionBegin; 2861 PetscValidType(F,1); 2862 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2863 PetscValidRealPointer(val,3); 2864 PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 /*@ 2869 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2870 2871 Logically Collective on Mat 2872 2873 Input Parameters: 2874 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2875 - icntl - index of MUMPS parameter array RINFOG() 2876 2877 Output Parameter: 2878 . val - value of MUMPS RINFOG(icntl) 2879 2880 Level: beginner 2881 2882 References: 2883 . * - MUMPS Users' Guide 2884 2885 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2886 @*/ 2887 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2888 { 2889 PetscFunctionBegin; 2890 PetscValidType(F,1); 2891 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2892 PetscValidRealPointer(val,3); 2893 PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2894 PetscFunctionReturn(0); 2895 } 2896 2897 /*MC 2898 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2899 distributed and sequential matrices via the external package MUMPS. 2900 2901 Works with MATAIJ and MATSBAIJ matrices 2902 2903 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2904 2905 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2906 2907 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2908 2909 Options Database Keys: 2910 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2911 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2912 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2913 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2914 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2915 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2916 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2917 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2918 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2919 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2920 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2921 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2922 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2923 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2924 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2925 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2926 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2927 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2928 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2929 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2930 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2931 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2932 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2933 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2934 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2935 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2936 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2937 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2938 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2939 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2940 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2941 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2942 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2943 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2944 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2945 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2946 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2947 2948 Level: beginner 2949 2950 Notes: 2951 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2952 2953 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2954 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2955 2956 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2957 $ KSPGetPC(ksp,&pc); 2958 $ PCFactorGetMatrix(pc,&mat); 2959 $ MatMumpsGetInfo(mat,....); 2960 $ MatMumpsGetInfog(mat,....); etc. 2961 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2962 2963 Using MUMPS with 64-bit integers 2964 MUMPS provides 64-bit integer support in two build modes: 2965 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2966 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2967 2968 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2969 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2970 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2971 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2972 2973 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2974 2975 Two modes to run MUMPS/PETSc with OpenMP 2976 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2977 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2978 2979 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2980 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2981 2982 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2983 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2984 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2985 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2986 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2987 2988 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2989 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2990 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2991 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2992 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2993 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2994 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2995 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2996 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2997 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2998 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2999 examine the mapping result. 3000 3001 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 3002 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 3003 calls omp_set_num_threads(m) internally before calling MUMPS. 3004 3005 References: 3006 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 3007 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3008 3009 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3010 3011 M*/ 3012 3013 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 3014 { 3015 PetscFunctionBegin; 3016 *type = MATSOLVERMUMPS; 3017 PetscFunctionReturn(0); 3018 } 3019 3020 /* MatGetFactor for Seq and MPI AIJ matrices */ 3021 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3022 { 3023 Mat B; 3024 Mat_MUMPS *mumps; 3025 PetscBool isSeqAIJ; 3026 PetscMPIInt size; 3027 3028 PetscFunctionBegin; 3029 #if defined(PETSC_USE_COMPLEX) 3030 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3031 #endif 3032 /* Create the factorization matrix */ 3033 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ)); 3034 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3035 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3036 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3037 PetscCall(MatSetUp(B)); 3038 3039 PetscCall(PetscNewLog(B,&mumps)); 3040 3041 B->ops->view = MatView_MUMPS; 3042 B->ops->getinfo = MatGetInfo_MUMPS; 3043 3044 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3045 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3046 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3047 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3048 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3049 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3050 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3051 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3052 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3053 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3054 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3055 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3056 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3057 3058 if (ftype == MAT_FACTOR_LU) { 3059 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3060 B->factortype = MAT_FACTOR_LU; 3061 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3062 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3063 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3064 mumps->sym = 0; 3065 } else { 3066 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3067 B->factortype = MAT_FACTOR_CHOLESKY; 3068 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3069 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3070 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3071 #if defined(PETSC_USE_COMPLEX) 3072 mumps->sym = 2; 3073 #else 3074 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3075 else mumps->sym = 2; 3076 #endif 3077 } 3078 3079 /* set solvertype */ 3080 PetscCall(PetscFree(B->solvertype)); 3081 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3082 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3083 if (size == 1) { 3084 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3085 B->canuseordering = PETSC_TRUE; 3086 } 3087 B->ops->destroy = MatDestroy_MUMPS; 3088 B->data = (void*)mumps; 3089 3090 *F = B; 3091 mumps->id.job = JOB_NULL; 3092 mumps->ICNTL_pre = NULL; 3093 mumps->CNTL_pre = NULL; 3094 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3099 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3100 { 3101 Mat B; 3102 Mat_MUMPS *mumps; 3103 PetscBool isSeqSBAIJ; 3104 PetscMPIInt size; 3105 3106 PetscFunctionBegin; 3107 #if defined(PETSC_USE_COMPLEX) 3108 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3109 #endif 3110 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3111 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3112 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3113 PetscCall(MatSetUp(B)); 3114 3115 PetscCall(PetscNewLog(B,&mumps)); 3116 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ)); 3117 if (isSeqSBAIJ) { 3118 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3119 } else { 3120 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3121 } 3122 3123 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3124 B->ops->view = MatView_MUMPS; 3125 B->ops->getinfo = MatGetInfo_MUMPS; 3126 3127 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3128 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3129 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3130 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3131 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3132 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3133 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3134 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3135 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3136 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3137 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3138 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3139 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3140 3141 B->factortype = MAT_FACTOR_CHOLESKY; 3142 #if defined(PETSC_USE_COMPLEX) 3143 mumps->sym = 2; 3144 #else 3145 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3146 else mumps->sym = 2; 3147 #endif 3148 3149 /* set solvertype */ 3150 PetscCall(PetscFree(B->solvertype)); 3151 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3152 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3153 if (size == 1) { 3154 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3155 B->canuseordering = PETSC_TRUE; 3156 } 3157 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3158 B->ops->destroy = MatDestroy_MUMPS; 3159 B->data = (void*)mumps; 3160 3161 *F = B; 3162 mumps->id.job = JOB_NULL; 3163 mumps->ICNTL_pre = NULL; 3164 mumps->CNTL_pre = NULL; 3165 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3166 PetscFunctionReturn(0); 3167 } 3168 3169 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3170 { 3171 Mat B; 3172 Mat_MUMPS *mumps; 3173 PetscBool isSeqBAIJ; 3174 PetscMPIInt size; 3175 3176 PetscFunctionBegin; 3177 /* Create the factorization matrix */ 3178 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ)); 3179 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3180 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3181 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3182 PetscCall(MatSetUp(B)); 3183 3184 PetscCall(PetscNewLog(B,&mumps)); 3185 if (ftype == MAT_FACTOR_LU) { 3186 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3187 B->factortype = MAT_FACTOR_LU; 3188 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3189 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3190 mumps->sym = 0; 3191 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3192 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3193 3194 B->ops->view = MatView_MUMPS; 3195 B->ops->getinfo = MatGetInfo_MUMPS; 3196 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3204 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3205 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3206 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3207 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3208 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3209 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3210 3211 /* set solvertype */ 3212 PetscCall(PetscFree(B->solvertype)); 3213 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3214 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3215 if (size == 1) { 3216 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3217 B->canuseordering = PETSC_TRUE; 3218 } 3219 B->ops->destroy = MatDestroy_MUMPS; 3220 B->data = (void*)mumps; 3221 3222 *F = B; 3223 mumps->id.job = JOB_NULL; 3224 mumps->ICNTL_pre = NULL; 3225 mumps->CNTL_pre = NULL; 3226 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3227 PetscFunctionReturn(0); 3228 } 3229 3230 /* MatGetFactor for Seq and MPI SELL matrices */ 3231 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3232 { 3233 Mat B; 3234 Mat_MUMPS *mumps; 3235 PetscBool isSeqSELL; 3236 PetscMPIInt size; 3237 3238 PetscFunctionBegin; 3239 /* Create the factorization matrix */ 3240 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL)); 3241 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3242 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3243 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3244 PetscCall(MatSetUp(B)); 3245 3246 PetscCall(PetscNewLog(B,&mumps)); 3247 3248 B->ops->view = MatView_MUMPS; 3249 B->ops->getinfo = MatGetInfo_MUMPS; 3250 3251 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3252 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3253 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3254 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3255 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3256 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3257 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3258 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3259 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3260 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3261 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3262 3263 if (ftype == MAT_FACTOR_LU) { 3264 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3265 B->factortype = MAT_FACTOR_LU; 3266 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3267 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3268 mumps->sym = 0; 3269 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3270 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3271 3272 /* set solvertype */ 3273 PetscCall(PetscFree(B->solvertype)); 3274 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3275 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3276 if (size == 1) { 3277 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3278 B->canuseordering = PETSC_TRUE; 3279 } 3280 B->ops->destroy = MatDestroy_MUMPS; 3281 B->data = (void*)mumps; 3282 3283 *F = B; 3284 mumps->id.job = JOB_NULL; 3285 mumps->ICNTL_pre = NULL; 3286 mumps->CNTL_pre = NULL; 3287 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3288 PetscFunctionReturn(0); 3289 } 3290 3291 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3292 { 3293 PetscFunctionBegin; 3294 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3295 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3296 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3297 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3298 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3299 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3300 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3301 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3302 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3303 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3304 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps)); 3305 PetscFunctionReturn(0); 3306 } 3307