1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(0); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval,value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(0); 89 } 90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm));\ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm));\ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { MUMPS_c(&mumps->id); } while (0) 118 #endif 119 120 /* declare MumpsScalar */ 121 #if defined(PETSC_USE_COMPLEX) 122 #if defined(PETSC_USE_REAL_SINGLE) 123 #define MumpsScalar mumps_complex 124 #else 125 #define MumpsScalar mumps_double_complex 126 #endif 127 #else 128 #define MumpsScalar PetscScalar 129 #endif 130 131 /* macros s.t. indices match MUMPS documentation */ 132 #define ICNTL(I) icntl[(I)-1] 133 #define CNTL(I) cntl[(I)-1] 134 #define INFOG(I) infog[(I)-1] 135 #define INFO(I) info[(I)-1] 136 #define RINFOG(I) rinfog[(I)-1] 137 #define RINFO(I) rinfo[(I)-1] 138 139 typedef struct Mat_MUMPS Mat_MUMPS; 140 struct Mat_MUMPS { 141 #if defined(PETSC_USE_COMPLEX) 142 #if defined(PETSC_USE_REAL_SINGLE) 143 CMUMPS_STRUC_C id; 144 #else 145 ZMUMPS_STRUC_C id; 146 #endif 147 #else 148 #if defined(PETSC_USE_REAL_SINGLE) 149 SMUMPS_STRUC_C id; 150 #else 151 DMUMPS_STRUC_C id; 152 #endif 153 #endif 154 155 MatStructure matstruc; 156 PetscMPIInt myid,petsc_size; 157 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 158 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 159 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 160 PetscMUMPSInt sym; 161 MPI_Comm mumps_comm; 162 PetscMUMPSInt *ICNTL_pre; 163 PetscReal *CNTL_pre; 164 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 165 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 166 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 167 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 168 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 169 PetscInt *rhs_nrow,max_nrhs; 170 PetscMPIInt *rhs_recvcounts,*rhs_disps; 171 PetscScalar *rhs_loc,*rhs_recvbuf; 172 #endif 173 Vec b_seq,x_seq; 174 PetscInt ninfo,*info; /* which INFO to display */ 175 PetscInt sizeredrhs; 176 PetscScalar *schur_sol; 177 PetscInt schur_sizesol; 178 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 179 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 180 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 181 182 /* stuff used by petsc/mumps OpenMP support*/ 183 PetscBool use_petsc_omp_support; 184 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 185 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 186 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 187 PetscMPIInt tag,omp_comm_size; 188 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 189 MPI_Request *reqs; 190 }; 191 192 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 193 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 194 */ 195 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 196 { 197 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 198 199 PetscFunctionBegin; 200 #if defined(PETSC_USE_64BIT_INDICES) 201 { 202 PetscInt i; 203 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 204 PetscCall(PetscFree(mumps->ia_alloc)); 205 PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc)); 206 mumps->cur_ilen = nrow+1; 207 } 208 if (nnz > mumps->cur_jlen) { 209 PetscCall(PetscFree(mumps->ja_alloc)); 210 PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc)); 211 mumps->cur_jlen = nnz; 212 } 213 for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]))); 214 for (i=0; i<nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]))); 215 *ia_mumps = mumps->ia_alloc; 216 *ja_mumps = mumps->ja_alloc; 217 } 218 #else 219 *ia_mumps = ia; 220 *ja_mumps = ja; 221 #endif 222 PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps)); 223 PetscFunctionReturn(0); 224 } 225 226 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 227 { 228 PetscFunctionBegin; 229 PetscCall(PetscFree(mumps->id.listvar_schur)); 230 PetscCall(PetscFree(mumps->id.redrhs)); 231 PetscCall(PetscFree(mumps->schur_sol)); 232 mumps->id.size_schur = 0; 233 mumps->id.schur_lld = 0; 234 mumps->id.ICNTL(19) = 0; 235 PetscFunctionReturn(0); 236 } 237 238 /* solve with rhs in mumps->id.redrhs and return in the same location */ 239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 240 { 241 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 242 Mat S,B,X; 243 MatFactorSchurStatus schurstatus; 244 PetscInt sizesol; 245 246 PetscFunctionBegin; 247 PetscCall(MatFactorFactorizeSchurComplement(F)); 248 PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus)); 249 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B)); 250 PetscCall(MatSetType(B,((PetscObject)S)->type_name)); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 PetscCall(MatBindToCPU(B,S->boundtocpu)); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X)); 257 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 PetscCall(MatBindToCPU(X,S->boundtocpu)); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 PetscCall(MatMatSolveTranspose(S,B,X)); 263 } else { 264 PetscCall(MatMatSolve(S,B,X)); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 PetscCall(PetscFree(mumps->schur_sol)); 271 PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol)); 272 mumps->schur_sizesol = sizesol; 273 } 274 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X)); 275 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 PetscCall(MatBindToCPU(X,S->boundtocpu)); 278 #endif 279 PetscCall(MatProductCreateWithMat(S,B,NULL,X)); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 PetscCall(MatProductSetType(X,MATPRODUCT_AtB)); 282 } else { 283 PetscCall(MatProductSetType(X,MATPRODUCT_AB)); 284 } 285 PetscCall(MatProductSetFromOptions(X)); 286 PetscCall(MatProductSymbolic(X)); 287 PetscCall(MatProductNumeric(X)); 288 289 PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN)); 290 break; 291 default: 292 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 293 } 294 PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus)); 295 PetscCall(MatDestroy(&B)); 296 PetscCall(MatDestroy(&X)); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 304 PetscFunctionBegin; 305 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 306 PetscFunctionReturn(0); 307 } 308 if (!expansion) { /* prepare for the condensation step */ 309 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 310 /* allocate MUMPS internal array to store reduced right-hand sides */ 311 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 312 PetscCall(PetscFree(mumps->id.redrhs)); 313 mumps->id.lredrhs = mumps->id.size_schur; 314 PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs)); 315 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 316 } 317 mumps->id.ICNTL(26) = 1; /* condensation phase */ 318 } else { /* prepare for the expansion step */ 319 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 320 PetscCall(MatMumpsSolveSchur_Private(F)); 321 mumps->id.ICNTL(26) = 2; /* expansion phase */ 322 PetscMUMPS_c(mumps); 323 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 324 /* restore defaults */ 325 mumps->id.ICNTL(26) = -1; 326 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 327 if (mumps->id.nrhs > 1) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = 0; 330 mumps->sizeredrhs = 0; 331 } 332 } 333 PetscFunctionReturn(0); 334 } 335 336 /* 337 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 338 339 input: 340 A - matrix in aij,baij or sbaij format 341 shift - 0: C style output triple; 1: Fortran style output triple. 342 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 343 MAT_REUSE_MATRIX: only the values in v array are updated 344 output: 345 nnz - dim of r, c, and v (number of local nonzero entries of A) 346 r, c, v - row and col index, matrix values (matrix triples) 347 348 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 349 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 350 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 351 352 */ 353 354 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 355 { 356 const PetscScalar *av; 357 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 358 PetscInt64 nz,rnz,i,j,k; 359 PetscMUMPSInt *row,*col; 360 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 364 mumps->val = (PetscScalar*)av; 365 if (reuse == MAT_INITIAL_MATRIX) { 366 nz = aa->nz; 367 ai = aa->i; 368 aj = aa->j; 369 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 370 for (i=k=0; i<M; i++) { 371 rnz = ai[i+1] - ai[i]; 372 ajj = aj + ai[i]; 373 for (j=0; j<rnz; j++) { 374 PetscCall(PetscMUMPSIntCast(i+shift,&row[k])); 375 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k])); 376 k++; 377 } 378 } 379 mumps->irn = row; 380 mumps->jcn = col; 381 mumps->nnz = nz; 382 } 383 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 384 PetscFunctionReturn(0); 385 } 386 387 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 388 { 389 PetscInt64 nz,i,j,k,r; 390 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 391 PetscMUMPSInt *row,*col; 392 393 PetscFunctionBegin; 394 mumps->val = a->val; 395 if (reuse == MAT_INITIAL_MATRIX) { 396 nz = a->sliidx[a->totalslices]; 397 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 398 for (i=k=0; i<a->totalslices; i++) { 399 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 400 PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++])); 401 } 402 } 403 for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 414 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 415 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 416 PetscInt bs; 417 PetscMUMPSInt *row,*col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A,&bs)); 421 M = A->rmap->N/bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; aj = aa->j; 425 nz = bs2*aa->nz; 426 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 427 for (i=0; i<M; i++) { 428 ajj = aj + ai[i]; 429 rnz = ai[i+1] - ai[i]; 430 for (k=0; k<rnz; k++) { 431 for (j=0; j<bs; j++) { 432 for (m=0; m<bs; m++) { 433 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx])); 434 PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx])); 435 idx++; 436 } 437 } 438 } 439 } 440 mumps->irn = row; 441 mumps->jcn = col; 442 mumps->nnz = nz; 443 } 444 PetscFunctionReturn(0); 445 } 446 447 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 448 { 449 const PetscInt *ai, *aj,*ajj; 450 PetscInt bs; 451 PetscInt64 nz,rnz,i,j,k,m; 452 PetscMUMPSInt *row,*col; 453 PetscScalar *val; 454 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 455 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 456 #if defined(PETSC_USE_COMPLEX) 457 PetscBool isset,hermitian; 458 #endif 459 460 PetscFunctionBegin; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 463 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 464 #endif 465 ai = aa->i; 466 aj = aa->j; 467 PetscCall(MatGetBlockSize(A,&bs)); 468 if (reuse == MAT_INITIAL_MATRIX) { 469 nz = aa->nz; 470 PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col)); 471 if (bs>1) { 472 PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc)); 473 mumps->val = mumps->val_alloc; 474 } else { 475 mumps->val = aa->a; 476 } 477 mumps->irn = row; 478 mumps->jcn = col; 479 } else { 480 if (bs == 1) mumps->val = aa->a; 481 row = mumps->irn; 482 col = mumps->jcn; 483 } 484 val = mumps->val; 485 486 nz = 0; 487 if (bs>1) { 488 for (i=0; i<mbs; i++) { 489 rnz = ai[i+1] - ai[i]; 490 ajj = aj + ai[i]; 491 for (j=0; j<rnz; j++) { 492 for (k=0; k<bs; k++) { 493 for (m=0; m<bs; m++) { 494 if (ajj[j]>i || k>=m) { 495 if (reuse == MAT_INITIAL_MATRIX) { 496 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz])); 497 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz])); 498 } 499 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 500 } 501 } 502 } 503 } 504 } 505 } else if (reuse == MAT_INITIAL_MATRIX) { 506 for (i=0; i<mbs; i++) { 507 rnz = ai[i+1] - ai[i]; 508 ajj = aj + ai[i]; 509 for (j=0; j<rnz; j++) { 510 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 511 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 512 nz++; 513 } 514 } 515 PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 516 } 517 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 522 { 523 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 524 PetscInt64 nz,rnz,i,j; 525 const PetscScalar *av,*v1; 526 PetscScalar *val; 527 PetscMUMPSInt *row,*col; 528 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 529 PetscBool missing; 530 #if defined(PETSC_USE_COMPLEX) 531 PetscBool hermitian,isset; 532 #endif 533 534 PetscFunctionBegin; 535 #if defined(PETSC_USE_COMPLEX) 536 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 537 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 538 #endif 539 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 540 ai = aa->i; aj = aa->j; 541 adiag = aa->diag; 542 PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL)); 543 if (reuse == MAT_INITIAL_MATRIX) { 544 /* count nz in the upper triangular part of A */ 545 nz = 0; 546 if (missing) { 547 for (i=0; i<M; i++) { 548 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 549 for (j=ai[i];j<ai[i+1];j++) { 550 if (aj[j] < i) continue; 551 nz++; 552 } 553 } else { 554 nz += ai[i+1] - adiag[i]; 555 } 556 } 557 } else { 558 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 559 } 560 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 561 PetscCall(PetscMalloc1(nz,&val)); 562 mumps->nnz = nz; 563 mumps->irn = row; 564 mumps->jcn = col; 565 mumps->val = mumps->val_alloc = val; 566 567 nz = 0; 568 if (missing) { 569 for (i=0; i<M; i++) { 570 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 571 for (j=ai[i];j<ai[i+1];j++) { 572 if (aj[j] < i) continue; 573 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 574 PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz])); 575 val[nz] = av[j]; 576 nz++; 577 } 578 } else { 579 rnz = ai[i+1] - adiag[i]; 580 ajj = aj + adiag[i]; 581 v1 = av + adiag[i]; 582 for (j=0; j<rnz; j++) { 583 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 584 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 585 val[nz++] = v1[j]; 586 } 587 } 588 } 589 } else { 590 for (i=0; i<M; i++) { 591 rnz = ai[i+1] - adiag[i]; 592 ajj = aj + adiag[i]; 593 v1 = av + adiag[i]; 594 for (j=0; j<rnz; j++) { 595 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 596 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 597 val[nz++] = v1[j]; 598 } 599 } 600 } 601 } else { 602 nz = 0; 603 val = mumps->val; 604 if (missing) { 605 for (i=0; i <M; i++) { 606 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 607 for (j=ai[i];j<ai[i+1];j++) { 608 if (aj[j] < i) continue; 609 val[nz++] = av[j]; 610 } 611 } else { 612 rnz = ai[i+1] - adiag[i]; 613 v1 = av + adiag[i]; 614 for (j=0; j<rnz; j++) { 615 val[nz++] = v1[j]; 616 } 617 } 618 } 619 } else { 620 for (i=0; i <M; i++) { 621 rnz = ai[i+1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j=0; j<rnz; j++) { 624 val[nz++] = v1[j]; 625 } 626 } 627 } 628 } 629 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 634 { 635 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 636 PetscInt bs; 637 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 638 PetscMUMPSInt *row,*col; 639 const PetscScalar *av,*bv,*v1,*v2; 640 PetscScalar *val; 641 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 642 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 643 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 644 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 645 #if defined(PETSC_USE_COMPLEX) 646 PetscBool hermitian,isset; 647 #endif 648 649 PetscFunctionBegin; 650 #if defined(PETSC_USE_COMPLEX) 651 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 652 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 653 #endif 654 PetscCall(MatGetBlockSize(A,&bs)); 655 rstart = A->rmap->rstart; 656 ai = aa->i; 657 aj = aa->j; 658 bi = bb->i; 659 bj = bb->j; 660 av = aa->a; 661 bv = bb->a; 662 663 garray = mat->garray; 664 665 if (reuse == MAT_INITIAL_MATRIX) { 666 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 667 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 668 PetscCall(PetscMalloc1(nz,&val)); 669 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 670 mumps->irn = row; 671 mumps->jcn = col; 672 mumps->val = mumps->val_alloc = val; 673 } else { 674 val = mumps->val; 675 } 676 677 jj = 0; irow = rstart; 678 for (i=0; i<mbs; i++) { 679 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 680 countA = ai[i+1] - ai[i]; 681 countB = bi[i+1] - bi[i]; 682 bjj = bj + bi[i]; 683 v1 = av + ai[i]*bs2; 684 v2 = bv + bi[i]*bs2; 685 686 if (bs>1) { 687 /* A-part */ 688 for (j=0; j<countA; j++) { 689 for (k=0; k<bs; k++) { 690 for (m=0; m<bs; m++) { 691 if (rstart + ajj[j]*bs>irow || k>=m) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 694 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj])); 695 } 696 val[jj++] = v1[j*bs2 + m + k*bs]; 697 } 698 } 699 } 700 } 701 702 /* B-part */ 703 for (j=0; j < countB; j++) { 704 for (k=0; k<bs; k++) { 705 for (m=0; m<bs; m++) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 708 PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj])); 709 } 710 val[jj++] = v2[j*bs2 + m + k*bs]; 711 } 712 } 713 } 714 } else { 715 /* A-part */ 716 for (j=0; j<countA; j++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 719 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 720 } 721 val[jj++] = v1[j]; 722 } 723 724 /* B-part */ 725 for (j=0; j < countB; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 728 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 729 } 730 val[jj++] = v2[j]; 731 } 732 } 733 irow+=bs; 734 } 735 mumps->nnz = jj; 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 740 { 741 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 742 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 743 PetscMUMPSInt *row,*col; 744 const PetscScalar *av, *bv,*v1,*v2; 745 PetscScalar *val; 746 Mat Ad,Ao; 747 Mat_SeqAIJ *aa; 748 Mat_SeqAIJ *bb; 749 750 PetscFunctionBegin; 751 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 752 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 753 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 754 755 aa = (Mat_SeqAIJ*)(Ad)->data; 756 bb = (Mat_SeqAIJ*)(Ao)->data; 757 ai = aa->i; 758 aj = aa->j; 759 bi = bb->i; 760 bj = bb->j; 761 762 rstart = A->rmap->rstart; 763 764 if (reuse == MAT_INITIAL_MATRIX) { 765 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 766 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 767 PetscCall(PetscMalloc1(nz,&val)); 768 mumps->nnz = nz; 769 mumps->irn = row; 770 mumps->jcn = col; 771 mumps->val = mumps->val_alloc = val; 772 } else { 773 val = mumps->val; 774 } 775 776 jj = 0; irow = rstart; 777 for (i=0; i<m; i++) { 778 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 779 countA = ai[i+1] - ai[i]; 780 countB = bi[i+1] - bi[i]; 781 bjj = bj + bi[i]; 782 v1 = av + ai[i]; 783 v2 = bv + bi[i]; 784 785 /* A-part */ 786 for (j=0; j<countA; j++) { 787 if (reuse == MAT_INITIAL_MATRIX) { 788 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 789 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 790 } 791 val[jj++] = v1[j]; 792 } 793 794 /* B-part */ 795 for (j=0; j < countB; j++) { 796 if (reuse == MAT_INITIAL_MATRIX) { 797 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 798 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 799 } 800 val[jj++] = v2[j]; 801 } 802 irow++; 803 } 804 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 805 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 810 { 811 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 812 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 813 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 814 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 815 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 816 const PetscInt bs2=mat->bs2; 817 PetscInt bs; 818 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 819 PetscMUMPSInt *row,*col; 820 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 821 PetscScalar *val; 822 823 PetscFunctionBegin; 824 PetscCall(MatGetBlockSize(A,&bs)); 825 if (reuse == MAT_INITIAL_MATRIX) { 826 nz = bs2*(aa->nz + bb->nz); 827 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 828 PetscCall(PetscMalloc1(nz,&val)); 829 mumps->nnz = nz; 830 mumps->irn = row; 831 mumps->jcn = col; 832 mumps->val = mumps->val_alloc = val; 833 } else { 834 val = mumps->val; 835 } 836 837 jj = 0; irow = rstart; 838 for (i=0; i<mbs; i++) { 839 countA = ai[i+1] - ai[i]; 840 countB = bi[i+1] - bi[i]; 841 ajj = aj + ai[i]; 842 bjj = bj + bi[i]; 843 v1 = av + bs2*ai[i]; 844 v2 = bv + bs2*bi[i]; 845 846 idx = 0; 847 /* A-part */ 848 for (k=0; k<countA; k++) { 849 for (j=0; j<bs; j++) { 850 for (n=0; n<bs; n++) { 851 if (reuse == MAT_INITIAL_MATRIX) { 852 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 853 PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj])); 854 } 855 val[jj++] = v1[idx++]; 856 } 857 } 858 } 859 860 idx = 0; 861 /* B-part */ 862 for (k=0; k<countB; k++) { 863 for (j=0; j<bs; j++) { 864 for (n=0; n<bs; n++) { 865 if (reuse == MAT_INITIAL_MATRIX) { 866 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 867 PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj])); 868 } 869 val[jj++] = v2[idx++]; 870 } 871 } 872 } 873 irow += bs; 874 } 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 879 { 880 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 881 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 882 PetscMUMPSInt *row,*col; 883 const PetscScalar *av, *bv,*v1,*v2; 884 PetscScalar *val; 885 Mat Ad,Ao; 886 Mat_SeqAIJ *aa; 887 Mat_SeqAIJ *bb; 888 #if defined(PETSC_USE_COMPLEX) 889 PetscBool hermitian,isset; 890 #endif 891 892 PetscFunctionBegin; 893 #if defined(PETSC_USE_COMPLEX) 894 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 895 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 896 #endif 897 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 898 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 899 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 900 901 aa = (Mat_SeqAIJ*)(Ad)->data; 902 bb = (Mat_SeqAIJ*)(Ao)->data; 903 ai = aa->i; 904 aj = aa->j; 905 adiag = aa->diag; 906 bi = bb->i; 907 bj = bb->j; 908 909 rstart = A->rmap->rstart; 910 911 if (reuse == MAT_INITIAL_MATRIX) { 912 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 913 nzb = 0; /* num of upper triangular entries in mat->B */ 914 for (i=0; i<m; i++) { 915 nza += (ai[i+1] - adiag[i]); 916 countB = bi[i+1] - bi[i]; 917 bjj = bj + bi[i]; 918 for (j=0; j<countB; j++) { 919 if (garray[bjj[j]] > rstart) nzb++; 920 } 921 } 922 923 nz = nza + nzb; /* total nz of upper triangular part of mat */ 924 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 925 PetscCall(PetscMalloc1(nz,&val)); 926 mumps->nnz = nz; 927 mumps->irn = row; 928 mumps->jcn = col; 929 mumps->val = mumps->val_alloc = val; 930 } else { 931 val = mumps->val; 932 } 933 934 jj = 0; irow = rstart; 935 for (i=0; i<m; i++) { 936 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 937 v1 = av + adiag[i]; 938 countA = ai[i+1] - adiag[i]; 939 countB = bi[i+1] - bi[i]; 940 bjj = bj + bi[i]; 941 v2 = bv + bi[i]; 942 943 /* A-part */ 944 for (j=0; j<countA; j++) { 945 if (reuse == MAT_INITIAL_MATRIX) { 946 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 947 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 948 } 949 val[jj++] = v1[j]; 950 } 951 952 /* B-part */ 953 for (j=0; j < countB; j++) { 954 if (garray[bjj[j]] > rstart) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 957 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 958 } 959 val[jj++] = v2[j]; 960 } 961 } 962 irow++; 963 } 964 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 965 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode MatDestroy_MUMPS(Mat A) 970 { 971 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 972 973 PetscFunctionBegin; 974 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 975 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 976 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 977 PetscCall(VecDestroy(&mumps->b_seq)); 978 PetscCall(VecDestroy(&mumps->x_seq)); 979 PetscCall(PetscFree(mumps->id.perm_in)); 980 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 981 PetscCall(PetscFree(mumps->val_alloc)); 982 PetscCall(PetscFree(mumps->info)); 983 PetscCall(PetscFree(mumps->ICNTL_pre)); 984 PetscCall(PetscFree(mumps->CNTL_pre)); 985 PetscCall(MatMumpsResetSchur_Private(mumps)); 986 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 987 mumps->id.job = JOB_END; 988 PetscMUMPS_c(mumps); 989 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 990 if (mumps->mumps_comm != MPI_COMM_NULL) { 991 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 992 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 993 } 994 } 995 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 996 if (mumps->use_petsc_omp_support) { 997 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 998 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 999 PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps)); 1000 } 1001 #endif 1002 PetscCall(PetscFree(mumps->ia_alloc)); 1003 PetscCall(PetscFree(mumps->ja_alloc)); 1004 PetscCall(PetscFree(mumps->recvcount)); 1005 PetscCall(PetscFree(mumps->reqs)); 1006 PetscCall(PetscFree(mumps->irhs_loc)); 1007 PetscCall(PetscFree(A->data)); 1008 1009 /* clear composed functions */ 1010 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1011 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL)); 1012 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL)); 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL)); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1027 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1028 { 1029 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1030 const PetscMPIInt ompsize=mumps->omp_comm_size; 1031 PetscInt i,m,M,rstart; 1032 1033 PetscFunctionBegin; 1034 PetscCall(MatGetSize(A,&M,NULL)); 1035 PetscCall(MatGetLocalSize(A,&m,NULL)); 1036 PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1037 if (ompsize == 1) { 1038 if (!mumps->irhs_loc) { 1039 mumps->nloc_rhs = m; 1040 PetscCall(PetscMalloc1(m,&mumps->irhs_loc)); 1041 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1042 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1043 } 1044 mumps->id.rhs_loc = (MumpsScalar*)array; 1045 } else { 1046 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1047 const PetscInt *ranges; 1048 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1049 MPI_Group petsc_group,omp_group; 1050 PetscScalar *recvbuf=NULL; 1051 1052 if (mumps->is_omp_master) { 1053 /* Lazily initialize the omp stuff for distributed rhs */ 1054 if (!mumps->irhs_loc) { 1055 PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks)); 1056 PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps)); 1057 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group)); 1058 PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group)); 1059 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1060 PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks)); 1061 1062 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1063 mumps->nloc_rhs = 0; 1064 PetscCall(MatGetOwnershipRanges(A,&ranges)); 1065 for (j=0; j<ompsize; j++) { 1066 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1067 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1068 } 1069 PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc)); 1070 for (j=k=0; j<ompsize; j++) { 1071 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1072 } 1073 1074 PetscCall(PetscFree2(omp_ranks,petsc_ranks)); 1075 PetscCallMPI(MPI_Group_free(&petsc_group)); 1076 PetscCallMPI(MPI_Group_free(&omp_group)); 1077 } 1078 1079 /* Realloc buffers when current nrhs is bigger than what we have met */ 1080 if (nrhs > mumps->max_nrhs) { 1081 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 1082 PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf)); 1083 mumps->max_nrhs = nrhs; 1084 } 1085 1086 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1087 for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j])); 1088 mumps->rhs_disps[0] = 0; 1089 for (j=1; j<ompsize; j++) { 1090 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1091 PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1092 } 1093 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1094 } 1095 1096 PetscCall(PetscMPIIntCast(m*nrhs,&sendcount)); 1097 PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm)); 1098 1099 if (mumps->is_omp_master) { 1100 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1101 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1102 for (j=0; j<ompsize; j++) { 1103 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1104 dst = dstbase; 1105 for (i=0; i<nrhs; i++) { 1106 PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j])); 1107 src += mumps->rhs_nrow[j]; 1108 dst += mumps->nloc_rhs; 1109 } 1110 dstbase += mumps->rhs_nrow[j]; 1111 } 1112 } 1113 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1114 } 1115 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1116 } 1117 mumps->id.nrhs = nrhs; 1118 mumps->id.nloc_rhs = mumps->nloc_rhs; 1119 mumps->id.lrhs_loc = mumps->nloc_rhs; 1120 mumps->id.irhs_loc = mumps->irhs_loc; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1125 { 1126 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1127 const PetscScalar *rarray = NULL; 1128 PetscScalar *array; 1129 IS is_iden,is_petsc; 1130 PetscInt i; 1131 PetscBool second_solve = PETSC_FALSE; 1132 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1133 1134 PetscFunctionBegin; 1135 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1)); 1136 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2)); 1137 1138 if (A->factorerrortype) { 1139 PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1140 PetscCall(VecSetInf(x)); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 mumps->id.nrhs = 1; 1145 if (mumps->petsc_size > 1) { 1146 if (mumps->ICNTL20 == 10) { 1147 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1148 PetscCall(VecGetArrayRead(b,&rarray)); 1149 PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray)); 1150 } else { 1151 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1152 PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1153 PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1154 if (!mumps->myid) { 1155 PetscCall(VecGetArray(mumps->b_seq,&array)); 1156 mumps->id.rhs = (MumpsScalar*)array; 1157 } 1158 } 1159 } else { /* petsc_size == 1 */ 1160 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1161 PetscCall(VecCopy(b,x)); 1162 PetscCall(VecGetArray(x,&array)); 1163 mumps->id.rhs = (MumpsScalar*)array; 1164 } 1165 1166 /* 1167 handle condensation step of Schur complement (if any) 1168 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1169 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1170 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1171 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1172 */ 1173 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1174 PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1175 second_solve = PETSC_TRUE; 1176 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1177 } 1178 /* solve phase */ 1179 /*-------------*/ 1180 mumps->id.job = JOB_SOLVE; 1181 PetscMUMPS_c(mumps); 1182 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1183 1184 /* handle expansion step of Schur complement (if any) */ 1185 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1186 1187 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1188 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1189 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1190 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1191 } 1192 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1193 PetscInt *isol2_loc=NULL; 1194 PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */ 1195 PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc)); 1196 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc)); /* to */ 1198 PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol)); 1199 PetscCall(ISDestroy(&is_iden)); 1200 PetscCall(ISDestroy(&is_petsc)); 1201 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1202 } 1203 1204 PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1205 PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1206 } 1207 1208 if (mumps->petsc_size > 1) { 1209 if (mumps->ICNTL20 == 10) { 1210 PetscCall(VecRestoreArrayRead(b,&rarray)); 1211 } else if (!mumps->myid) { 1212 PetscCall(VecRestoreArray(mumps->b_seq,&array)); 1213 } 1214 } else PetscCall(VecRestoreArray(x,&array)); 1215 1216 PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3))); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1221 { 1222 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1223 1224 PetscFunctionBegin; 1225 mumps->id.ICNTL(9) = 0; 1226 PetscCall(MatSolve_MUMPS(A,b,x)); 1227 mumps->id.ICNTL(9) = 1; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1232 { 1233 Mat Bt = NULL; 1234 PetscBool denseX,denseB,flg,flgT; 1235 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1236 PetscInt i,nrhs,M; 1237 PetscScalar *array; 1238 const PetscScalar *rbray; 1239 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1240 PetscMUMPSInt *isol_loc,*isol_loc_save; 1241 PetscScalar *bray,*sol_loc,*sol_loc_save; 1242 IS is_to,is_from; 1243 PetscInt k,proc,j,m,myrstart; 1244 const PetscInt *rstart; 1245 Vec v_mpi,msol_loc; 1246 VecScatter scat_sol; 1247 Vec b_seq; 1248 VecScatter scat_rhs; 1249 PetscScalar *aa; 1250 PetscInt spnr,*ia,*ja; 1251 Mat_MPIAIJ *b = NULL; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL)); 1255 PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1256 1257 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL)); 1258 if (denseB) { 1259 PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1260 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1261 } else { /* sparse B */ 1262 PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1263 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT)); 1264 if (flgT) { /* input B is transpose of actural RHS matrix, 1265 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1266 PetscCall(MatTransposeGetMat(B,&Bt)); 1267 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1268 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1269 } 1270 1271 PetscCall(MatGetSize(B,&M,&nrhs)); 1272 mumps->id.nrhs = nrhs; 1273 mumps->id.lrhs = M; 1274 mumps->id.rhs = NULL; 1275 1276 if (mumps->petsc_size == 1) { 1277 PetscScalar *aa; 1278 PetscInt spnr,*ia,*ja; 1279 PetscBool second_solve = PETSC_FALSE; 1280 1281 PetscCall(MatDenseGetArray(X,&array)); 1282 mumps->id.rhs = (MumpsScalar*)array; 1283 1284 if (denseB) { 1285 /* copy B to X */ 1286 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1287 PetscCall(PetscArraycpy(array,rbray,M*nrhs)); 1288 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1289 } else { /* sparse B */ 1290 PetscCall(MatSeqAIJGetArray(Bt,&aa)); 1291 PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1292 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1293 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1294 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1295 } 1296 /* handle condensation step of Schur complement (if any) */ 1297 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1298 second_solve = PETSC_TRUE; 1299 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1300 } 1301 /* solve phase */ 1302 /*-------------*/ 1303 mumps->id.job = JOB_SOLVE; 1304 PetscMUMPS_c(mumps); 1305 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1306 1307 /* handle expansion step of Schur complement (if any) */ 1308 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1309 if (!denseB) { /* sparse B */ 1310 PetscCall(MatSeqAIJRestoreArray(Bt,&aa)); 1311 PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1312 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1313 } 1314 PetscCall(MatDenseRestoreArray(X,&array)); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1319 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1320 1321 /* create msol_loc to hold mumps local solution */ 1322 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1323 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1324 1325 lsol_loc = mumps->id.lsol_loc; 1326 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1327 PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc)); 1328 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1329 mumps->id.isol_loc = isol_loc; 1330 1331 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc)); 1332 1333 if (denseB) { 1334 if (mumps->ICNTL20 == 10) { 1335 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1336 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1337 PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray)); 1338 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1339 PetscCall(MatGetLocalSize(B,&m,NULL)); 1340 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi)); 1341 } else { 1342 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1343 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1344 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1345 0, re-arrange B into desired order, which is a local operation. 1346 */ 1347 1348 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1349 /* wrap dense rhs matrix B into a vector v_mpi */ 1350 PetscCall(MatGetLocalSize(B,&m,NULL)); 1351 PetscCall(MatDenseGetArray(B,&bray)); 1352 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1353 PetscCall(MatDenseRestoreArray(B,&bray)); 1354 1355 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1356 if (!mumps->myid) { 1357 PetscInt *idx; 1358 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1359 PetscCall(PetscMalloc1(nrhs*M,&idx)); 1360 PetscCall(MatGetOwnershipRanges(B,&rstart)); 1361 k = 0; 1362 for (proc=0; proc<mumps->petsc_size; proc++) { 1363 for (j=0; j<nrhs; j++) { 1364 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1365 } 1366 } 1367 1368 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq)); 1369 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to)); 1370 PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from)); 1371 } else { 1372 PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq)); 1373 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to)); 1374 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from)); 1375 } 1376 PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs)); 1377 PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1378 PetscCall(ISDestroy(&is_to)); 1379 PetscCall(ISDestroy(&is_from)); 1380 PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1381 1382 if (!mumps->myid) { /* define rhs on the host */ 1383 PetscCall(VecGetArray(b_seq,&bray)); 1384 mumps->id.rhs = (MumpsScalar*)bray; 1385 PetscCall(VecRestoreArray(b_seq,&bray)); 1386 } 1387 } 1388 } else { /* sparse B */ 1389 b = (Mat_MPIAIJ*)Bt->data; 1390 1391 /* wrap dense X into a vector v_mpi */ 1392 PetscCall(MatGetLocalSize(X,&m,NULL)); 1393 PetscCall(MatDenseGetArray(X,&bray)); 1394 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1395 PetscCall(MatDenseRestoreArray(X,&bray)); 1396 1397 if (!mumps->myid) { 1398 PetscCall(MatSeqAIJGetArray(b->A,&aa)); 1399 PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1400 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1401 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1402 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1403 } else { 1404 mumps->id.irhs_ptr = NULL; 1405 mumps->id.irhs_sparse = NULL; 1406 mumps->id.nz_rhs = 0; 1407 mumps->id.rhs_sparse = NULL; 1408 } 1409 } 1410 1411 /* solve phase */ 1412 /*-------------*/ 1413 mumps->id.job = JOB_SOLVE; 1414 PetscMUMPS_c(mumps); 1415 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1416 1417 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1418 PetscCall(MatDenseGetArray(X,&array)); 1419 PetscCall(VecPlaceArray(v_mpi,array)); 1420 1421 /* create scatter scat_sol */ 1422 PetscCall(MatGetOwnershipRanges(X,&rstart)); 1423 /* iidx: index for scatter mumps solution to petsc X */ 1424 1425 PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from)); 1426 PetscCall(PetscMalloc1(nlsol_loc,&idxx)); 1427 for (i=0; i<lsol_loc; i++) { 1428 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1429 1430 for (proc=0; proc<mumps->petsc_size; proc++) { 1431 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1432 myrstart = rstart[proc]; 1433 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1434 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1435 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1436 break; 1437 } 1438 } 1439 1440 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1441 } 1442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to)); 1443 PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol)); 1444 PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1445 PetscCall(ISDestroy(&is_from)); 1446 PetscCall(ISDestroy(&is_to)); 1447 PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1448 PetscCall(MatDenseRestoreArray(X,&array)); 1449 1450 /* free spaces */ 1451 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1452 mumps->id.isol_loc = isol_loc_save; 1453 1454 PetscCall(PetscFree2(sol_loc,isol_loc)); 1455 PetscCall(PetscFree(idxx)); 1456 PetscCall(VecDestroy(&msol_loc)); 1457 PetscCall(VecDestroy(&v_mpi)); 1458 if (!denseB) { 1459 if (!mumps->myid) { 1460 b = (Mat_MPIAIJ*)Bt->data; 1461 PetscCall(MatSeqAIJRestoreArray(b->A,&aa)); 1462 PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1463 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1464 } 1465 } else { 1466 if (mumps->ICNTL20 == 0) { 1467 PetscCall(VecDestroy(&b_seq)); 1468 PetscCall(VecScatterDestroy(&scat_rhs)); 1469 } 1470 } 1471 PetscCall(VecScatterDestroy(&scat_sol)); 1472 PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3))); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1477 { 1478 PetscBool flg; 1479 Mat B; 1480 1481 PetscFunctionBegin; 1482 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 1483 PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1484 1485 /* Create B=Bt^T that uses Bt's data structure */ 1486 PetscCall(MatCreateTranspose(Bt,&B)); 1487 1488 PetscCall(MatMatSolve_MUMPS(A,B,X)); 1489 PetscCall(MatDestroy(&B)); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #if !defined(PETSC_USE_COMPLEX) 1494 /* 1495 input: 1496 F: numeric factor 1497 output: 1498 nneg: total number of negative pivots 1499 nzero: total number of zero pivots 1500 npos: (global dimension of F) - nneg - nzero 1501 */ 1502 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1503 { 1504 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1505 PetscMPIInt size; 1506 1507 PetscFunctionBegin; 1508 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size)); 1509 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1510 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1511 1512 if (nneg) *nneg = mumps->id.INFOG(12); 1513 if (nzero || npos) { 1514 PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1515 if (nzero) *nzero = mumps->id.INFOG(28); 1516 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 #endif 1521 1522 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1523 { 1524 PetscInt i,nreqs; 1525 PetscMUMPSInt *irn,*jcn; 1526 PetscMPIInt count; 1527 PetscInt64 totnnz,remain; 1528 const PetscInt osize=mumps->omp_comm_size; 1529 PetscScalar *val; 1530 1531 PetscFunctionBegin; 1532 if (osize > 1) { 1533 if (reuse == MAT_INITIAL_MATRIX) { 1534 /* master first gathers counts of nonzeros to receive */ 1535 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount)); 1536 PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm)); 1537 1538 /* Then each computes number of send/recvs */ 1539 if (mumps->is_omp_master) { 1540 /* Start from 1 since self communication is not done in MPI */ 1541 nreqs = 0; 1542 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1543 } else { 1544 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1545 } 1546 PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1547 1548 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1549 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1550 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1551 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1552 */ 1553 nreqs = 0; /* counter for actual send/recvs */ 1554 if (mumps->is_omp_master) { 1555 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1556 PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn)); 1557 PetscCall(PetscMalloc1(totnnz,&val)); 1558 1559 /* Self communication */ 1560 PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz)); 1561 PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz)); 1562 PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz)); 1563 1564 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1565 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 1566 PetscCall(PetscFree(mumps->val_alloc)); 1567 mumps->nnz = totnnz; 1568 mumps->irn = irn; 1569 mumps->jcn = jcn; 1570 mumps->val = mumps->val_alloc = val; 1571 1572 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1573 jcn += mumps->recvcount[0]; 1574 val += mumps->recvcount[0]; 1575 1576 /* Remote communication */ 1577 for (i=1; i<osize; i++) { 1578 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1579 remain = mumps->recvcount[i] - count; 1580 while (count>0) { 1581 PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1582 PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1583 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1584 irn += count; 1585 jcn += count; 1586 val += count; 1587 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1588 remain -= count; 1589 } 1590 } 1591 } else { 1592 irn = mumps->irn; 1593 jcn = mumps->jcn; 1594 val = mumps->val; 1595 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1596 remain = mumps->nnz - count; 1597 while (count>0) { 1598 PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1599 PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1600 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1601 irn += count; 1602 jcn += count; 1603 val += count; 1604 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1605 remain -= count; 1606 } 1607 } 1608 } else { 1609 nreqs = 0; 1610 if (mumps->is_omp_master) { 1611 val = mumps->val + mumps->recvcount[0]; 1612 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1613 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1614 remain = mumps->recvcount[i] - count; 1615 while (count>0) { 1616 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1617 val += count; 1618 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1619 remain -= count; 1620 } 1621 } 1622 } else { 1623 val = mumps->val; 1624 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1625 remain = mumps->nnz - count; 1626 while (count>0) { 1627 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1628 val += count; 1629 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1630 remain -= count; 1631 } 1632 } 1633 } 1634 PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE)); 1635 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1636 } 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1641 { 1642 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1643 PetscBool isMPIAIJ; 1644 1645 PetscFunctionBegin; 1646 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1647 if (mumps->id.INFOG(1) == -6) { 1648 PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1649 } 1650 PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1655 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps)); 1656 1657 /* numerical factorization phase */ 1658 /*-------------------------------*/ 1659 mumps->id.job = JOB_FACTNUMERIC; 1660 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1661 if (!mumps->myid) { 1662 mumps->id.a = (MumpsScalar*)mumps->val; 1663 } 1664 } else { 1665 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1666 } 1667 PetscMUMPS_c(mumps); 1668 if (mumps->id.INFOG(1) < 0) { 1669 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1670 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1671 PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1672 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1673 } else if (mumps->id.INFOG(1) == -13) { 1674 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1675 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1676 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1677 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1678 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1679 } else { 1680 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1681 F->factorerrortype = MAT_FACTOR_OTHER; 1682 } 1683 } 1684 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1685 1686 F->assembled = PETSC_TRUE; 1687 1688 if (F->schur) { /* reset Schur status to unfactored */ 1689 #if defined(PETSC_HAVE_CUDA) 1690 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1691 #endif 1692 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1693 mumps->id.ICNTL(19) = 2; 1694 PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur)); 1695 } 1696 PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED)); 1697 } 1698 1699 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1700 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1701 1702 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1703 if (mumps->petsc_size > 1) { 1704 PetscInt lsol_loc; 1705 PetscScalar *sol_loc; 1706 1707 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ)); 1708 1709 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1710 if (mumps->x_seq) { 1711 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1712 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 1713 PetscCall(VecDestroy(&mumps->x_seq)); 1714 } 1715 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1716 PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc)); 1717 mumps->id.lsol_loc = lsol_loc; 1718 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1719 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq)); 1720 } 1721 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 /* Sets MUMPS options from the options database */ 1726 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1727 { 1728 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1729 PetscMUMPSInt icntl=0,size,*listvar_schur; 1730 PetscInt info[80],i,ninfo=80,rbs,cbs; 1731 PetscBool flg=PETSC_FALSE,schur=(PetscBool)(mumps->id.ICNTL(26) == -1); 1732 MumpsScalar *arr; 1733 1734 PetscFunctionBegin; 1735 PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat"); 1736 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1737 PetscInt nthreads = 0; 1738 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1739 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1740 1741 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1742 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size)); 1743 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */ 1744 1745 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads","Convert MPI processes into OpenMP threads","None",&mumps->use_petsc_omp_support)); 1746 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1747 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1748 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL)); 1749 if (mumps->use_petsc_omp_support) { 1750 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT),PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)F)->prefix?((PetscObject)F)->prefix:""); 1751 PetscCheck(!schur,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature",((PetscObject)F)->prefix?((PetscObject)F)->prefix:""); 1752 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1753 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl)); 1754 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master)); 1755 #endif 1756 } else { 1757 mumps->omp_comm = PETSC_COMM_SELF; 1758 mumps->mumps_comm = mumps->petsc_comm; 1759 mumps->is_omp_master = PETSC_TRUE; 1760 } 1761 PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size)); 1762 mumps->reqs = NULL; 1763 mumps->tag = 0; 1764 1765 if (mumps->mumps_comm != MPI_COMM_NULL) { 1766 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1767 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1768 MPI_Comm comm; 1769 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm,&comm)); 1770 mumps->mumps_comm = comm; 1771 } else PetscCall(PetscCommGetComm(mumps->petsc_comm,&mumps->mumps_comm)); 1772 } 1773 1774 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1775 mumps->id.job = JOB_INIT; 1776 mumps->id.par = 1; /* host participates factorizaton and solve */ 1777 mumps->id.sym = mumps->sym; 1778 1779 size = mumps->id.size_schur; 1780 arr = mumps->id.schur; 1781 listvar_schur = mumps->id.listvar_schur; 1782 PetscMUMPS_c(mumps); 1783 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1784 /* restore cached ICNTL and CNTL values */ 1785 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1+2*icntl]) = mumps->ICNTL_pre[2+2*icntl]; 1786 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1+2*icntl]) = mumps->CNTL_pre[2+2*icntl]; 1787 PetscCall(PetscFree(mumps->ICNTL_pre)); 1788 PetscCall(PetscFree(mumps->CNTL_pre)); 1789 1790 if (schur) { 1791 mumps->id.size_schur = size; 1792 mumps->id.schur_lld = size; 1793 mumps->id.schur = arr; 1794 mumps->id.listvar_schur = listvar_schur; 1795 if (mumps->petsc_size > 1) { 1796 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1797 1798 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1799 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1800 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm)); 1801 PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1802 } else { 1803 if (F->factortype == MAT_FACTOR_LU) { 1804 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1805 } else { 1806 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1807 } 1808 } 1809 mumps->id.ICNTL(26) = -1; 1810 } 1811 1812 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1813 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1814 */ 1815 PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm)); 1816 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm)); 1817 1818 mumps->scat_rhs = NULL; 1819 mumps->scat_sol = NULL; 1820 1821 /* set PETSc-MUMPS default options - override MUMPS default */ 1822 mumps->id.ICNTL(3) = 0; 1823 mumps->id.ICNTL(4) = 0; 1824 if (mumps->petsc_size == 1) { 1825 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1826 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1827 } else { 1828 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1829 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1830 } 1831 } 1832 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg)); 1833 if (flg) mumps->id.ICNTL(1) = icntl; 1834 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg)); 1835 if (flg) mumps->id.ICNTL(2) = icntl; 1836 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg)); 1837 if (flg) mumps->id.ICNTL(3) = icntl; 1838 1839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg)); 1840 if (flg) mumps->id.ICNTL(4) = icntl; 1841 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1842 1843 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg)); 1844 if (flg) mumps->id.ICNTL(6) = icntl; 1845 1846 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg)); 1847 if (flg) { 1848 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1849 mumps->id.ICNTL(7) = icntl; 1850 } 1851 1852 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL)); 1853 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1854 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL)); 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL)); 1856 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL)); 1857 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL)); 1858 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL)); 1859 PetscCall(MatGetBlockSizes(A,&rbs,&cbs)); 1860 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1861 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg)); 1862 if (flg) { 1863 PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled"); 1864 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1865 } 1866 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL)); 1867 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1868 PetscCall(MatDestroy(&F->schur)); 1869 PetscCall(MatMumpsResetSchur_Private(mumps)); 1870 } 1871 1872 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1873 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1874 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1875 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1876 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1877 In short, we could not use distributed RHS with MPICH until v4.0b1. 1878 */ 1879 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1880 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1881 #else 1882 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1883 #endif 1884 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg)); 1885 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1886 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1887 PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1888 #endif 1889 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1890 1891 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL)); 1892 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL)); 1893 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL)); 1894 if (mumps->id.ICNTL(24)) { 1895 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1896 } 1897 1898 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL)); 1899 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL)); 1900 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL)); 1901 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL)); 1902 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL)); 1903 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1904 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL)); 1905 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1906 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL)); 1907 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL)); 1908 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL)); 1909 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL)); 1910 1911 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL)); 1912 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL)); 1913 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL)); 1914 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL)); 1915 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL)); 1916 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL)); 1917 1918 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1919 1920 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL)); 1921 if (ninfo) { 1922 PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1923 PetscCall(PetscMalloc1(ninfo,&mumps->info)); 1924 mumps->ninfo = ninfo; 1925 for (i=0; i<ninfo; i++) { 1926 PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1927 mumps->info[i] = info[i]; 1928 } 1929 } 1930 PetscOptionsEnd(); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1935 { 1936 PetscFunctionBegin; 1937 if (mumps->id.INFOG(1) < 0) { 1938 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1939 if (mumps->id.INFOG(1) == -6) { 1940 PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1941 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1942 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1943 PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1944 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1945 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1946 PetscCall(PetscInfo(F,"Empty matrix\n")); 1947 } else { 1948 PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1949 F->factorerrortype = MAT_FACTOR_OTHER; 1950 } 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1956 { 1957 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1958 Vec b; 1959 const PetscInt M = A->rmap->N; 1960 1961 PetscFunctionBegin; 1962 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1963 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1964 PetscFunctionReturn(0); 1965 } 1966 1967 /* Set MUMPS options from the options database */ 1968 PetscCall(MatSetFromOptions_MUMPS(F,A)); 1969 1970 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1971 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 1972 1973 /* analysis phase */ 1974 /*----------------*/ 1975 mumps->id.job = JOB_FACTSYMBOLIC; 1976 mumps->id.n = M; 1977 switch (mumps->id.ICNTL(18)) { 1978 case 0: /* centralized assembled matrix input */ 1979 if (!mumps->myid) { 1980 mumps->id.nnz = mumps->nnz; 1981 mumps->id.irn = mumps->irn; 1982 mumps->id.jcn = mumps->jcn; 1983 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1984 if (r) { 1985 mumps->id.ICNTL(7) = 1; 1986 if (!mumps->myid) { 1987 const PetscInt *idx; 1988 PetscInt i; 1989 1990 PetscCall(PetscMalloc1(M,&mumps->id.perm_in)); 1991 PetscCall(ISGetIndices(r,&idx)); 1992 for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 1993 PetscCall(ISRestoreIndices(r,&idx)); 1994 } 1995 } 1996 } 1997 break; 1998 case 3: /* distributed assembled matrix input (size>1) */ 1999 mumps->id.nnz_loc = mumps->nnz; 2000 mumps->id.irn_loc = mumps->irn; 2001 mumps->id.jcn_loc = mumps->jcn; 2002 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 2003 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2004 PetscCall(MatCreateVecs(A,NULL,&b)); 2005 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2006 PetscCall(VecDestroy(&b)); 2007 } 2008 break; 2009 } 2010 PetscMUMPS_c(mumps); 2011 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2012 2013 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2014 F->ops->solve = MatSolve_MUMPS; 2015 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2016 F->ops->matsolve = MatMatSolve_MUMPS; 2017 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2018 2019 mumps->matstruc = SAME_NONZERO_PATTERN; 2020 PetscFunctionReturn(0); 2021 } 2022 2023 /* Note the Petsc r and c permutations are ignored */ 2024 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2025 { 2026 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2027 Vec b; 2028 const PetscInt M = A->rmap->N; 2029 2030 PetscFunctionBegin; 2031 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2032 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2033 PetscFunctionReturn(0); 2034 } 2035 2036 /* Set MUMPS options from the options database */ 2037 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2038 2039 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2040 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2041 2042 /* analysis phase */ 2043 /*----------------*/ 2044 mumps->id.job = JOB_FACTSYMBOLIC; 2045 mumps->id.n = M; 2046 switch (mumps->id.ICNTL(18)) { 2047 case 0: /* centralized assembled matrix input */ 2048 if (!mumps->myid) { 2049 mumps->id.nnz = mumps->nnz; 2050 mumps->id.irn = mumps->irn; 2051 mumps->id.jcn = mumps->jcn; 2052 if (mumps->id.ICNTL(6)>1) { 2053 mumps->id.a = (MumpsScalar*)mumps->val; 2054 } 2055 } 2056 break; 2057 case 3: /* distributed assembled matrix input (size>1) */ 2058 mumps->id.nnz_loc = mumps->nnz; 2059 mumps->id.irn_loc = mumps->irn; 2060 mumps->id.jcn_loc = mumps->jcn; 2061 if (mumps->id.ICNTL(6)>1) { 2062 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2063 } 2064 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2065 PetscCall(MatCreateVecs(A,NULL,&b)); 2066 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2067 PetscCall(VecDestroy(&b)); 2068 } 2069 break; 2070 } 2071 PetscMUMPS_c(mumps); 2072 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2073 2074 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2075 F->ops->solve = MatSolve_MUMPS; 2076 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2077 2078 mumps->matstruc = SAME_NONZERO_PATTERN; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* Note the Petsc r permutation and factor info are ignored */ 2083 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2084 { 2085 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2086 Vec b; 2087 const PetscInt M = A->rmap->N; 2088 2089 PetscFunctionBegin; 2090 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2091 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2092 PetscFunctionReturn(0); 2093 } 2094 2095 /* Set MUMPS options from the options database */ 2096 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2097 2098 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2099 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2100 2101 /* analysis phase */ 2102 /*----------------*/ 2103 mumps->id.job = JOB_FACTSYMBOLIC; 2104 mumps->id.n = M; 2105 switch (mumps->id.ICNTL(18)) { 2106 case 0: /* centralized assembled matrix input */ 2107 if (!mumps->myid) { 2108 mumps->id.nnz = mumps->nnz; 2109 mumps->id.irn = mumps->irn; 2110 mumps->id.jcn = mumps->jcn; 2111 if (mumps->id.ICNTL(6)>1) { 2112 mumps->id.a = (MumpsScalar*)mumps->val; 2113 } 2114 } 2115 break; 2116 case 3: /* distributed assembled matrix input (size>1) */ 2117 mumps->id.nnz_loc = mumps->nnz; 2118 mumps->id.irn_loc = mumps->irn; 2119 mumps->id.jcn_loc = mumps->jcn; 2120 if (mumps->id.ICNTL(6)>1) { 2121 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2122 } 2123 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2124 PetscCall(MatCreateVecs(A,NULL,&b)); 2125 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2126 PetscCall(VecDestroy(&b)); 2127 } 2128 break; 2129 } 2130 PetscMUMPS_c(mumps); 2131 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2132 2133 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2134 F->ops->solve = MatSolve_MUMPS; 2135 F->ops->solvetranspose = MatSolve_MUMPS; 2136 F->ops->matsolve = MatMatSolve_MUMPS; 2137 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2138 #if defined(PETSC_USE_COMPLEX) 2139 F->ops->getinertia = NULL; 2140 #else 2141 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2142 #endif 2143 2144 mumps->matstruc = SAME_NONZERO_PATTERN; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2149 { 2150 PetscBool iascii; 2151 PetscViewerFormat format; 2152 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2153 2154 PetscFunctionBegin; 2155 /* check if matrix is mumps type */ 2156 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2157 2158 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 2159 if (iascii) { 2160 PetscCall(PetscViewerGetFormat(viewer,&format)); 2161 if (format == PETSC_VIEWER_ASCII_INFO) { 2162 PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n")); 2163 PetscCall(PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym)); 2164 PetscCall(PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par)); 2165 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1))); 2166 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2))); 2167 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3))); 2168 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4))); 2169 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5))); 2170 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6))); 2171 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8))); 2173 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10))); 2174 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11))); 2175 if (mumps->id.ICNTL(11)>0) { 2176 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11))); 2182 } 2183 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(15) (compression of the input matrix): %d\n",mumps->id.ICNTL(15))); 2187 /* ICNTL(15-17) not used */ 2188 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18))); 2189 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20))); 2191 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21))); 2192 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22))); 2193 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23))); 2194 2195 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25))); 2197 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26))); 2198 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28))); 2200 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29))); 2201 2202 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31))); 2204 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33))); 2205 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36))); 2207 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38))); 2208 2209 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2))); 2211 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3))); 2212 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5))); 2214 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7))); 2215 2216 /* information local to each processor */ 2217 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2218 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2219 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1))); 2220 PetscCall(PetscViewerFlush(viewer)); 2221 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2222 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2))); 2223 PetscCall(PetscViewerFlush(viewer)); 2224 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2225 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3))); 2226 PetscCall(PetscViewerFlush(viewer)); 2227 2228 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2229 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15))); 2230 PetscCall(PetscViewerFlush(viewer)); 2231 2232 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2233 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16))); 2234 PetscCall(PetscViewerFlush(viewer)); 2235 2236 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2237 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23))); 2238 PetscCall(PetscViewerFlush(viewer)); 2239 2240 if (mumps->ninfo && mumps->ninfo <= 80) { 2241 PetscInt i; 2242 for (i=0; i<mumps->ninfo; i++) { 2243 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i])); 2244 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]))); 2245 PetscCall(PetscViewerFlush(viewer)); 2246 } 2247 } 2248 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2249 2250 if (!mumps->myid) { /* information from the host */ 2251 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1))); 2252 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2))); 2253 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3))); 2254 PetscCall(PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34))); 2255 2256 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3))); 2257 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4))); 2258 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5))); 2259 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6))); 2260 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7))); 2261 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8))); 2262 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12))); 2266 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39))); 2290 } 2291 } 2292 } 2293 PetscFunctionReturn(0); 2294 } 2295 2296 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2297 { 2298 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2299 2300 PetscFunctionBegin; 2301 info->block_size = 1.0; 2302 info->nz_allocated = mumps->id.INFOG(20); 2303 info->nz_used = mumps->id.INFOG(20); 2304 info->nz_unneeded = 0.0; 2305 info->assemblies = 0.0; 2306 info->mallocs = 0.0; 2307 info->memory = 0.0; 2308 info->fill_ratio_given = 0; 2309 info->fill_ratio_needed = 0; 2310 info->factor_mallocs = 0; 2311 PetscFunctionReturn(0); 2312 } 2313 2314 /* -------------------------------------------------------------------------------------------*/ 2315 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2316 { 2317 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2318 const PetscScalar *arr; 2319 const PetscInt *idxs; 2320 PetscInt size,i; 2321 2322 PetscFunctionBegin; 2323 PetscCall(ISGetLocalSize(is,&size)); 2324 /* Schur complement matrix */ 2325 PetscCall(MatDestroy(&F->schur)); 2326 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur)); 2327 PetscCall(MatDenseGetArrayRead(F->schur,&arr)); 2328 mumps->id.schur = (MumpsScalar*)arr; 2329 mumps->id.size_schur = size; 2330 mumps->id.schur_lld = size; 2331 PetscCall(MatDenseRestoreArrayRead(F->schur,&arr)); 2332 if (mumps->sym == 1) { 2333 PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE)); 2334 } 2335 2336 /* MUMPS expects Fortran style indices */ 2337 PetscCall(PetscFree(mumps->id.listvar_schur)); 2338 PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur)); 2339 PetscCall(ISGetIndices(is,&idxs)); 2340 for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]))); 2341 PetscCall(ISRestoreIndices(is,&idxs)); 2342 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2343 mumps->id.ICNTL(26) = -1; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /* -------------------------------------------------------------------------------------------*/ 2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2349 { 2350 Mat St; 2351 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2352 PetscScalar *array; 2353 #if defined(PETSC_USE_COMPLEX) 2354 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2355 #endif 2356 2357 PetscFunctionBegin; 2358 PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2359 PetscCall(MatCreate(PETSC_COMM_SELF,&St)); 2360 PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur)); 2361 PetscCall(MatSetType(St,MATDENSE)); 2362 PetscCall(MatSetUp(St)); 2363 PetscCall(MatDenseGetArray(St,&array)); 2364 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2365 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2366 PetscInt i,j,N=mumps->id.size_schur; 2367 for (i=0;i<N;i++) { 2368 for (j=0;j<N;j++) { 2369 #if !defined(PETSC_USE_COMPLEX) 2370 PetscScalar val = mumps->id.schur[i*N+j]; 2371 #else 2372 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2373 #endif 2374 array[j*N+i] = val; 2375 } 2376 } 2377 } else { /* stored by columns */ 2378 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2379 } 2380 } else { /* either full or lower-triangular (not packed) */ 2381 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2382 PetscInt i,j,N=mumps->id.size_schur; 2383 for (i=0;i<N;i++) { 2384 for (j=i;j<N;j++) { 2385 #if !defined(PETSC_USE_COMPLEX) 2386 PetscScalar val = mumps->id.schur[i*N+j]; 2387 #else 2388 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2389 #endif 2390 array[i*N+j] = val; 2391 array[j*N+i] = val; 2392 } 2393 } 2394 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2395 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2396 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2397 PetscInt i,j,N=mumps->id.size_schur; 2398 for (i=0;i<N;i++) { 2399 for (j=0;j<i+1;j++) { 2400 #if !defined(PETSC_USE_COMPLEX) 2401 PetscScalar val = mumps->id.schur[i*N+j]; 2402 #else 2403 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2404 #endif 2405 array[i*N+j] = val; 2406 array[j*N+i] = val; 2407 } 2408 } 2409 } 2410 } 2411 PetscCall(MatDenseRestoreArray(St,&array)); 2412 *S = St; 2413 PetscFunctionReturn(0); 2414 } 2415 2416 /* -------------------------------------------------------------------------------------------*/ 2417 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2418 { 2419 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2420 2421 PetscFunctionBegin; 2422 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2423 PetscInt i,nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2424 for (i = 0; i < nICNTL_pre; ++i) if (mumps->ICNTL_pre[1+2*i] == icntl) break; /* is this ICNTL already cached? */ 2425 if (i == nICNTL_pre) { /* not already cached */ 2426 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt)*(2*nICNTL_pre + 3),&mumps->ICNTL_pre)); 2427 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt)*3,&mumps->ICNTL_pre)); 2428 mumps->ICNTL_pre[0]++; 2429 } 2430 mumps->ICNTL_pre[1+2*i] = icntl; 2431 PetscCall(PetscMUMPSIntCast(ival,mumps->ICNTL_pre+2+2*i)); 2432 } else PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl))); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2437 { 2438 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2439 2440 PetscFunctionBegin; 2441 *ival = mumps->id.ICNTL(icntl); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 /*@ 2446 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2447 2448 Logically Collective on Mat 2449 2450 Input Parameters: 2451 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2452 . icntl - index of MUMPS parameter array ICNTL() 2453 - ival - value of MUMPS ICNTL(icntl) 2454 2455 Options Database: 2456 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2457 2458 Level: beginner 2459 2460 References: 2461 . * - MUMPS Users' Guide 2462 2463 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2464 @*/ 2465 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2466 { 2467 PetscFunctionBegin; 2468 PetscValidType(F,1); 2469 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2470 PetscValidLogicalCollectiveInt(F,icntl,2); 2471 PetscValidLogicalCollectiveInt(F,ival,3); 2472 PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl); 2473 PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival)); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 /*@ 2478 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2479 2480 Logically Collective on Mat 2481 2482 Input Parameters: 2483 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2484 - icntl - index of MUMPS parameter array ICNTL() 2485 2486 Output Parameter: 2487 . ival - value of MUMPS ICNTL(icntl) 2488 2489 Level: beginner 2490 2491 References: 2492 . * - MUMPS Users' Guide 2493 2494 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2495 @*/ 2496 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2497 { 2498 PetscFunctionBegin; 2499 PetscValidType(F,1); 2500 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2501 PetscValidLogicalCollectiveInt(F,icntl,2); 2502 PetscValidIntPointer(ival,3); 2503 PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl); 2504 PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 /* -------------------------------------------------------------------------------------------*/ 2509 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2510 { 2511 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2512 2513 PetscFunctionBegin; 2514 if (mumps->id.job == JOB_NULL) { 2515 PetscInt i,nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2516 for (i = 0; i < nCNTL_pre; ++i) if (mumps->CNTL_pre[1+2*i] == icntl) break; 2517 if (i == nCNTL_pre) { 2518 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal)*(2*nCNTL_pre + 3),&mumps->CNTL_pre)); 2519 else PetscCall(PetscCalloc(sizeof(PetscReal)*3,&mumps->CNTL_pre)); 2520 mumps->CNTL_pre[0]++; 2521 } 2522 mumps->CNTL_pre[1+2*i] = icntl; 2523 mumps->CNTL_pre[2+2*i] = val; 2524 } else mumps->id.CNTL(icntl) = val; 2525 PetscFunctionReturn(0); 2526 } 2527 2528 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2529 { 2530 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2531 2532 PetscFunctionBegin; 2533 *val = mumps->id.CNTL(icntl); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 /*@ 2538 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2539 2540 Logically Collective on Mat 2541 2542 Input Parameters: 2543 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2544 . icntl - index of MUMPS parameter array CNTL() 2545 - val - value of MUMPS CNTL(icntl) 2546 2547 Options Database: 2548 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2549 2550 Level: beginner 2551 2552 References: 2553 . * - MUMPS Users' Guide 2554 2555 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2556 @*/ 2557 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2558 { 2559 PetscFunctionBegin; 2560 PetscValidType(F,1); 2561 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2562 PetscValidLogicalCollectiveInt(F,icntl,2); 2563 PetscValidLogicalCollectiveReal(F,val,3); 2564 PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl); 2565 PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val)); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /*@ 2570 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2571 2572 Logically Collective on Mat 2573 2574 Input Parameters: 2575 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2576 - icntl - index of MUMPS parameter array CNTL() 2577 2578 Output Parameter: 2579 . val - value of MUMPS CNTL(icntl) 2580 2581 Level: beginner 2582 2583 References: 2584 . * - MUMPS Users' Guide 2585 2586 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2587 @*/ 2588 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2589 { 2590 PetscFunctionBegin; 2591 PetscValidType(F,1); 2592 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2593 PetscValidLogicalCollectiveInt(F,icntl,2); 2594 PetscValidRealPointer(val,3); 2595 PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl); 2596 PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2597 PetscFunctionReturn(0); 2598 } 2599 2600 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2601 { 2602 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2603 2604 PetscFunctionBegin; 2605 *info = mumps->id.INFO(icntl); 2606 PetscFunctionReturn(0); 2607 } 2608 2609 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2610 { 2611 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2612 2613 PetscFunctionBegin; 2614 *infog = mumps->id.INFOG(icntl); 2615 PetscFunctionReturn(0); 2616 } 2617 2618 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2619 { 2620 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2621 2622 PetscFunctionBegin; 2623 *rinfo = mumps->id.RINFO(icntl); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2628 { 2629 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2630 2631 PetscFunctionBegin; 2632 *rinfog = mumps->id.RINFOG(icntl); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2637 { 2638 Mat Bt = NULL,Btseq = NULL; 2639 PetscBool flg; 2640 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2641 PetscScalar *aa; 2642 PetscInt spnr,*ia,*ja,M,nrhs; 2643 2644 PetscFunctionBegin; 2645 PetscValidPointer(spRHS,2); 2646 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg)); 2647 if (flg) { 2648 PetscCall(MatTransposeGetMat(spRHS,&Bt)); 2649 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2650 2651 PetscCall(MatMumpsSetIcntl(F,30,1)); 2652 2653 if (mumps->petsc_size > 1) { 2654 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2655 Btseq = b->A; 2656 } else { 2657 Btseq = Bt; 2658 } 2659 2660 PetscCall(MatGetSize(spRHS,&M,&nrhs)); 2661 mumps->id.nrhs = nrhs; 2662 mumps->id.lrhs = M; 2663 mumps->id.rhs = NULL; 2664 2665 if (!mumps->myid) { 2666 PetscCall(MatSeqAIJGetArray(Btseq,&aa)); 2667 PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2668 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2669 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 2670 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2671 } else { 2672 mumps->id.irhs_ptr = NULL; 2673 mumps->id.irhs_sparse = NULL; 2674 mumps->id.nz_rhs = 0; 2675 mumps->id.rhs_sparse = NULL; 2676 } 2677 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2678 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2679 2680 /* solve phase */ 2681 /*-------------*/ 2682 mumps->id.job = JOB_SOLVE; 2683 PetscMUMPS_c(mumps); 2684 if (mumps->id.INFOG(1) < 0) 2685 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2686 2687 if (!mumps->myid) { 2688 PetscCall(MatSeqAIJRestoreArray(Btseq,&aa)); 2689 PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2690 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2691 } 2692 PetscFunctionReturn(0); 2693 } 2694 2695 /*@ 2696 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2697 2698 Logically Collective on Mat 2699 2700 Input Parameters: 2701 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2702 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2703 2704 Output Parameter: 2705 . spRHS - requested entries of inverse of A 2706 2707 Level: beginner 2708 2709 References: 2710 . * - MUMPS Users' Guide 2711 2712 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2713 @*/ 2714 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2715 { 2716 PetscFunctionBegin; 2717 PetscValidType(F,1); 2718 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2719 PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS)); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2724 { 2725 Mat spRHS; 2726 2727 PetscFunctionBegin; 2728 PetscCall(MatCreateTranspose(spRHST,&spRHS)); 2729 PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS)); 2730 PetscCall(MatDestroy(&spRHS)); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 /*@ 2735 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2736 2737 Logically Collective on Mat 2738 2739 Input Parameters: 2740 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2741 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2742 2743 Output Parameter: 2744 . spRHST - requested entries of inverse of A^T 2745 2746 Level: beginner 2747 2748 References: 2749 . * - MUMPS Users' Guide 2750 2751 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2752 @*/ 2753 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2754 { 2755 PetscBool flg; 2756 2757 PetscFunctionBegin; 2758 PetscValidType(F,1); 2759 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2760 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 2761 PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2762 2763 PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST)); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 /*@ 2768 MatMumpsGetInfo - Get MUMPS parameter INFO() 2769 2770 Logically Collective on Mat 2771 2772 Input Parameters: 2773 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2774 - icntl - index of MUMPS parameter array INFO() 2775 2776 Output Parameter: 2777 . ival - value of MUMPS INFO(icntl) 2778 2779 Level: beginner 2780 2781 References: 2782 . * - MUMPS Users' Guide 2783 2784 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2785 @*/ 2786 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2787 { 2788 PetscFunctionBegin; 2789 PetscValidType(F,1); 2790 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2791 PetscValidIntPointer(ival,3); 2792 PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 /*@ 2797 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2798 2799 Logically Collective on Mat 2800 2801 Input Parameters: 2802 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2803 - icntl - index of MUMPS parameter array INFOG() 2804 2805 Output Parameter: 2806 . ival - value of MUMPS INFOG(icntl) 2807 2808 Level: beginner 2809 2810 References: 2811 . * - MUMPS Users' Guide 2812 2813 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2814 @*/ 2815 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2816 { 2817 PetscFunctionBegin; 2818 PetscValidType(F,1); 2819 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2820 PetscValidIntPointer(ival,3); 2821 PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2822 PetscFunctionReturn(0); 2823 } 2824 2825 /*@ 2826 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2827 2828 Logically Collective on Mat 2829 2830 Input Parameters: 2831 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2832 - icntl - index of MUMPS parameter array RINFO() 2833 2834 Output Parameter: 2835 . val - value of MUMPS RINFO(icntl) 2836 2837 Level: beginner 2838 2839 References: 2840 . * - MUMPS Users' Guide 2841 2842 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2843 @*/ 2844 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2845 { 2846 PetscFunctionBegin; 2847 PetscValidType(F,1); 2848 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2849 PetscValidRealPointer(val,3); 2850 PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 /*@ 2855 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2856 2857 Logically Collective on Mat 2858 2859 Input Parameters: 2860 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2861 - icntl - index of MUMPS parameter array RINFOG() 2862 2863 Output Parameter: 2864 . val - value of MUMPS RINFOG(icntl) 2865 2866 Level: beginner 2867 2868 References: 2869 . * - MUMPS Users' Guide 2870 2871 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2872 @*/ 2873 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2874 { 2875 PetscFunctionBegin; 2876 PetscValidType(F,1); 2877 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2878 PetscValidRealPointer(val,3); 2879 PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2880 PetscFunctionReturn(0); 2881 } 2882 2883 /*MC 2884 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2885 distributed and sequential matrices via the external package MUMPS. 2886 2887 Works with MATAIJ and MATSBAIJ matrices 2888 2889 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2890 2891 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2892 2893 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2894 2895 Options Database Keys: 2896 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2897 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2898 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2899 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2900 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2901 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2902 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2903 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2904 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2905 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2906 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2907 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2908 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2909 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2910 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2911 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2912 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2913 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2914 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2915 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2916 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2917 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2918 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2919 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2920 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2921 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2922 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2923 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2924 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2925 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2926 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2927 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2928 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2929 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2930 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2931 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2932 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2933 2934 Level: beginner 2935 2936 Notes: 2937 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2938 2939 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2940 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2941 2942 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2943 $ KSPGetPC(ksp,&pc); 2944 $ PCFactorGetMatrix(pc,&mat); 2945 $ MatMumpsGetInfo(mat,....); 2946 $ MatMumpsGetInfog(mat,....); etc. 2947 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2948 2949 Using MUMPS with 64-bit integers 2950 MUMPS provides 64-bit integer support in two build modes: 2951 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2952 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2953 2954 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2955 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2956 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2957 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2958 2959 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2960 2961 Two modes to run MUMPS/PETSc with OpenMP 2962 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2963 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2964 2965 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2966 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2967 2968 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2969 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2970 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2971 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2972 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2973 2974 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2975 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2976 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2977 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2978 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2979 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2980 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2981 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2982 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2983 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2984 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2985 examine the mapping result. 2986 2987 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2988 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2989 calls omp_set_num_threads(m) internally before calling MUMPS. 2990 2991 References: 2992 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2993 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2994 2995 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 2996 2997 M*/ 2998 2999 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 3000 { 3001 PetscFunctionBegin; 3002 *type = MATSOLVERMUMPS; 3003 PetscFunctionReturn(0); 3004 } 3005 3006 /* MatGetFactor for Seq and MPI AIJ matrices */ 3007 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3008 { 3009 Mat B; 3010 Mat_MUMPS *mumps; 3011 PetscBool isSeqAIJ; 3012 PetscMPIInt size; 3013 3014 PetscFunctionBegin; 3015 #if defined(PETSC_USE_COMPLEX) 3016 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3017 #endif 3018 /* Create the factorization matrix */ 3019 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ)); 3020 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3021 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3022 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3023 PetscCall(MatSetUp(B)); 3024 3025 PetscCall(PetscNewLog(B,&mumps)); 3026 3027 B->ops->view = MatView_MUMPS; 3028 B->ops->getinfo = MatGetInfo_MUMPS; 3029 3030 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3031 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3032 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3033 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3034 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3035 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3036 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3037 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3038 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3039 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3040 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3041 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3042 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3043 3044 if (ftype == MAT_FACTOR_LU) { 3045 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3046 B->factortype = MAT_FACTOR_LU; 3047 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3048 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3049 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3050 mumps->sym = 0; 3051 } else { 3052 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3053 B->factortype = MAT_FACTOR_CHOLESKY; 3054 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3055 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3056 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3057 #if defined(PETSC_USE_COMPLEX) 3058 mumps->sym = 2; 3059 #else 3060 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3061 else mumps->sym = 2; 3062 #endif 3063 } 3064 3065 /* set solvertype */ 3066 PetscCall(PetscFree(B->solvertype)); 3067 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3068 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3069 if (size == 1) { 3070 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3071 B->canuseordering = PETSC_TRUE; 3072 } 3073 B->ops->destroy = MatDestroy_MUMPS; 3074 B->data = (void*)mumps; 3075 3076 *F = B; 3077 mumps->id.job = JOB_NULL; 3078 mumps->ICNTL_pre = NULL; 3079 mumps->CNTL_pre = NULL; 3080 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3081 PetscFunctionReturn(0); 3082 } 3083 3084 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3085 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3086 { 3087 Mat B; 3088 Mat_MUMPS *mumps; 3089 PetscBool isSeqSBAIJ; 3090 PetscMPIInt size; 3091 3092 PetscFunctionBegin; 3093 #if defined(PETSC_USE_COMPLEX) 3094 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3095 #endif 3096 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3097 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3098 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3099 PetscCall(MatSetUp(B)); 3100 3101 PetscCall(PetscNewLog(B,&mumps)); 3102 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ)); 3103 if (isSeqSBAIJ) { 3104 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3105 } else { 3106 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3107 } 3108 3109 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3110 B->ops->view = MatView_MUMPS; 3111 B->ops->getinfo = MatGetInfo_MUMPS; 3112 3113 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3114 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3115 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3116 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3117 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3118 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3119 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3120 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3121 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3122 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3123 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3124 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3125 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3126 3127 B->factortype = MAT_FACTOR_CHOLESKY; 3128 #if defined(PETSC_USE_COMPLEX) 3129 mumps->sym = 2; 3130 #else 3131 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3132 else mumps->sym = 2; 3133 #endif 3134 3135 /* set solvertype */ 3136 PetscCall(PetscFree(B->solvertype)); 3137 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3138 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3139 if (size == 1) { 3140 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3141 B->canuseordering = PETSC_TRUE; 3142 } 3143 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3144 B->ops->destroy = MatDestroy_MUMPS; 3145 B->data = (void*)mumps; 3146 3147 *F = B; 3148 mumps->id.job = JOB_NULL; 3149 mumps->ICNTL_pre = NULL; 3150 mumps->CNTL_pre = NULL; 3151 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3152 PetscFunctionReturn(0); 3153 } 3154 3155 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3156 { 3157 Mat B; 3158 Mat_MUMPS *mumps; 3159 PetscBool isSeqBAIJ; 3160 PetscMPIInt size; 3161 3162 PetscFunctionBegin; 3163 /* Create the factorization matrix */ 3164 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ)); 3165 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3166 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3167 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3168 PetscCall(MatSetUp(B)); 3169 3170 PetscCall(PetscNewLog(B,&mumps)); 3171 if (ftype == MAT_FACTOR_LU) { 3172 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3173 B->factortype = MAT_FACTOR_LU; 3174 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3175 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3176 mumps->sym = 0; 3177 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3178 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3179 3180 B->ops->view = MatView_MUMPS; 3181 B->ops->getinfo = MatGetInfo_MUMPS; 3182 3183 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3184 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3185 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3186 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3187 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3196 3197 /* set solvertype */ 3198 PetscCall(PetscFree(B->solvertype)); 3199 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3200 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3201 if (size == 1) { 3202 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3203 B->canuseordering = PETSC_TRUE; 3204 } 3205 B->ops->destroy = MatDestroy_MUMPS; 3206 B->data = (void*)mumps; 3207 3208 *F = B; 3209 mumps->id.job = JOB_NULL; 3210 mumps->ICNTL_pre = NULL; 3211 mumps->CNTL_pre = NULL; 3212 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3213 PetscFunctionReturn(0); 3214 } 3215 3216 /* MatGetFactor for Seq and MPI SELL matrices */ 3217 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3218 { 3219 Mat B; 3220 Mat_MUMPS *mumps; 3221 PetscBool isSeqSELL; 3222 PetscMPIInt size; 3223 3224 PetscFunctionBegin; 3225 /* Create the factorization matrix */ 3226 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL)); 3227 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3228 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3229 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3230 PetscCall(MatSetUp(B)); 3231 3232 PetscCall(PetscNewLog(B,&mumps)); 3233 3234 B->ops->view = MatView_MUMPS; 3235 B->ops->getinfo = MatGetInfo_MUMPS; 3236 3237 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3238 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3239 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3240 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3241 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3242 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3243 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3244 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3245 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3246 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3247 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3248 3249 if (ftype == MAT_FACTOR_LU) { 3250 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3251 B->factortype = MAT_FACTOR_LU; 3252 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3253 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3254 mumps->sym = 0; 3255 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3256 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3257 3258 /* set solvertype */ 3259 PetscCall(PetscFree(B->solvertype)); 3260 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3261 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3262 if (size == 1) { 3263 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3264 B->canuseordering = PETSC_TRUE; 3265 } 3266 B->ops->destroy = MatDestroy_MUMPS; 3267 B->data = (void*)mumps; 3268 3269 *F = B; 3270 mumps->id.job = JOB_NULL; 3271 mumps->ICNTL_pre = NULL; 3272 mumps->CNTL_pre = NULL; 3273 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3274 PetscFunctionReturn(0); 3275 } 3276 3277 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3278 { 3279 PetscFunctionBegin; 3280 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3281 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3282 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3283 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3284 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3285 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3286 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3287 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3288 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3289 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3290 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps)); 3291 PetscFunctionReturn(0); 3292 } 3293