1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_NULL 0 27 #define JOB_FACTSYMBOLIC 1 28 #define JOB_FACTNUMERIC 2 29 #define JOB_SOLVE 3 30 #define JOB_END -2 31 32 /* calls to MUMPS */ 33 #if defined(PETSC_USE_COMPLEX) 34 #if defined(PETSC_USE_REAL_SINGLE) 35 #define MUMPS_c cmumps_c 36 #else 37 #define MUMPS_c zmumps_c 38 #endif 39 #else 40 #if defined(PETSC_USE_REAL_SINGLE) 41 #define MUMPS_c smumps_c 42 #else 43 #define MUMPS_c dmumps_c 44 #endif 45 #endif 46 47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 48 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 49 naming convention in PetscMPIInt, PetscBLASInt etc. 50 */ 51 typedef MUMPS_INT PetscMUMPSInt; 52 53 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 56 #endif 57 #else 58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 60 #endif 61 #endif 62 63 #define MPIU_MUMPSINT MPI_INT 64 #define PETSC_MUMPS_INT_MAX 2147483647 65 #define PETSC_MUMPS_INT_MIN -2147483648 66 67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_64BIT_INDICES) 72 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 73 #endif 74 *b = (PetscMUMPSInt)(a); 75 PetscFunctionReturn(0); 76 } 77 78 /* Put these utility routines here since they are only used in this file */ 79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 80 { 81 PetscInt myval; 82 PetscBool myset; 83 PetscFunctionBegin; 84 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 85 PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub)); 86 if (myset) PetscCall(PetscMUMPSIntCast(myval,value)); 87 if (set) *set = myset; 88 PetscFunctionReturn(0); 89 } 90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 91 92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 93 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 94 #define PetscMUMPS_c(mumps) \ 95 do { \ 96 if (mumps->use_petsc_omp_support) { \ 97 if (mumps->is_omp_master) { \ 98 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 99 MUMPS_c(&mumps->id); \ 100 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 101 } \ 102 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 103 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 104 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 105 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 106 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 107 */ \ 108 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\ 109 PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm));\ 110 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm));\ 111 } else { \ 112 MUMPS_c(&mumps->id); \ 113 } \ 114 } while (0) 115 #else 116 #define PetscMUMPS_c(mumps) \ 117 do { MUMPS_c(&mumps->id); } while (0) 118 #endif 119 120 /* declare MumpsScalar */ 121 #if defined(PETSC_USE_COMPLEX) 122 #if defined(PETSC_USE_REAL_SINGLE) 123 #define MumpsScalar mumps_complex 124 #else 125 #define MumpsScalar mumps_double_complex 126 #endif 127 #else 128 #define MumpsScalar PetscScalar 129 #endif 130 131 /* macros s.t. indices match MUMPS documentation */ 132 #define ICNTL(I) icntl[(I)-1] 133 #define CNTL(I) cntl[(I)-1] 134 #define INFOG(I) infog[(I)-1] 135 #define INFO(I) info[(I)-1] 136 #define RINFOG(I) rinfog[(I)-1] 137 #define RINFO(I) rinfo[(I)-1] 138 139 typedef struct Mat_MUMPS Mat_MUMPS; 140 struct Mat_MUMPS { 141 #if defined(PETSC_USE_COMPLEX) 142 #if defined(PETSC_USE_REAL_SINGLE) 143 CMUMPS_STRUC_C id; 144 #else 145 ZMUMPS_STRUC_C id; 146 #endif 147 #else 148 #if defined(PETSC_USE_REAL_SINGLE) 149 SMUMPS_STRUC_C id; 150 #else 151 DMUMPS_STRUC_C id; 152 #endif 153 #endif 154 155 MatStructure matstruc; 156 PetscMPIInt myid,petsc_size; 157 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 158 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 159 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 160 PetscMUMPSInt sym; 161 MPI_Comm mumps_comm; 162 PetscMUMPSInt *ICNTL_pre; 163 PetscReal *CNTL_pre; 164 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 165 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 166 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 167 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 168 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 169 PetscInt *rhs_nrow,max_nrhs; 170 PetscMPIInt *rhs_recvcounts,*rhs_disps; 171 PetscScalar *rhs_loc,*rhs_recvbuf; 172 #endif 173 Vec b_seq,x_seq; 174 PetscInt ninfo,*info; /* which INFO to display */ 175 PetscInt sizeredrhs; 176 PetscScalar *schur_sol; 177 PetscInt schur_sizesol; 178 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 179 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 180 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 181 182 /* stuff used by petsc/mumps OpenMP support*/ 183 PetscBool use_petsc_omp_support; 184 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 185 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 186 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 187 PetscMPIInt tag,omp_comm_size; 188 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 189 MPI_Request *reqs; 190 }; 191 192 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 193 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 194 */ 195 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 196 { 197 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 198 199 PetscFunctionBegin; 200 #if defined(PETSC_USE_64BIT_INDICES) 201 { 202 PetscInt i; 203 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 204 PetscCall(PetscFree(mumps->ia_alloc)); 205 PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc)); 206 mumps->cur_ilen = nrow+1; 207 } 208 if (nnz > mumps->cur_jlen) { 209 PetscCall(PetscFree(mumps->ja_alloc)); 210 PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc)); 211 mumps->cur_jlen = nnz; 212 } 213 for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]))); 214 for (i=0; i<nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]))); 215 *ia_mumps = mumps->ia_alloc; 216 *ja_mumps = mumps->ja_alloc; 217 } 218 #else 219 *ia_mumps = ia; 220 *ja_mumps = ja; 221 #endif 222 PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps)); 223 PetscFunctionReturn(0); 224 } 225 226 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 227 { 228 PetscFunctionBegin; 229 PetscCall(PetscFree(mumps->id.listvar_schur)); 230 PetscCall(PetscFree(mumps->id.redrhs)); 231 PetscCall(PetscFree(mumps->schur_sol)); 232 mumps->id.size_schur = 0; 233 mumps->id.schur_lld = 0; 234 mumps->id.ICNTL(19) = 0; 235 PetscFunctionReturn(0); 236 } 237 238 /* solve with rhs in mumps->id.redrhs and return in the same location */ 239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 240 { 241 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 242 Mat S,B,X; 243 MatFactorSchurStatus schurstatus; 244 PetscInt sizesol; 245 246 PetscFunctionBegin; 247 PetscCall(MatFactorFactorizeSchurComplement(F)); 248 PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus)); 249 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B)); 250 PetscCall(MatSetType(B,((PetscObject)S)->type_name)); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 PetscCall(MatBindToCPU(B,S->boundtocpu)); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X)); 257 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 PetscCall(MatBindToCPU(X,S->boundtocpu)); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 PetscCall(MatMatSolveTranspose(S,B,X)); 263 } else { 264 PetscCall(MatMatSolve(S,B,X)); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 PetscCall(PetscFree(mumps->schur_sol)); 271 PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol)); 272 mumps->schur_sizesol = sizesol; 273 } 274 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X)); 275 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 PetscCall(MatBindToCPU(X,S->boundtocpu)); 278 #endif 279 PetscCall(MatProductCreateWithMat(S,B,NULL,X)); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 PetscCall(MatProductSetType(X,MATPRODUCT_AtB)); 282 } else { 283 PetscCall(MatProductSetType(X,MATPRODUCT_AB)); 284 } 285 PetscCall(MatProductSetFromOptions(X)); 286 PetscCall(MatProductSymbolic(X)); 287 PetscCall(MatProductNumeric(X)); 288 289 PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN)); 290 break; 291 default: 292 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 293 } 294 PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus)); 295 PetscCall(MatDestroy(&B)); 296 PetscCall(MatDestroy(&X)); 297 PetscFunctionReturn(0); 298 } 299 300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 301 { 302 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 303 304 PetscFunctionBegin; 305 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 306 PetscFunctionReturn(0); 307 } 308 if (!expansion) { /* prepare for the condensation step */ 309 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 310 /* allocate MUMPS internal array to store reduced right-hand sides */ 311 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 312 PetscCall(PetscFree(mumps->id.redrhs)); 313 mumps->id.lredrhs = mumps->id.size_schur; 314 PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs)); 315 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 316 } 317 mumps->id.ICNTL(26) = 1; /* condensation phase */ 318 } else { /* prepare for the expansion step */ 319 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 320 PetscCall(MatMumpsSolveSchur_Private(F)); 321 mumps->id.ICNTL(26) = 2; /* expansion phase */ 322 PetscMUMPS_c(mumps); 323 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 324 /* restore defaults */ 325 mumps->id.ICNTL(26) = -1; 326 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 327 if (mumps->id.nrhs > 1) { 328 PetscCall(PetscFree(mumps->id.redrhs)); 329 mumps->id.lredrhs = 0; 330 mumps->sizeredrhs = 0; 331 } 332 } 333 PetscFunctionReturn(0); 334 } 335 336 /* 337 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 338 339 input: 340 A - matrix in aij,baij or sbaij format 341 shift - 0: C style output triple; 1: Fortran style output triple. 342 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 343 MAT_REUSE_MATRIX: only the values in v array are updated 344 output: 345 nnz - dim of r, c, and v (number of local nonzero entries of A) 346 r, c, v - row and col index, matrix values (matrix triples) 347 348 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 349 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 350 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 351 352 */ 353 354 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 355 { 356 const PetscScalar *av; 357 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 358 PetscInt64 nz,rnz,i,j,k; 359 PetscMUMPSInt *row,*col; 360 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 364 mumps->val = (PetscScalar*)av; 365 if (reuse == MAT_INITIAL_MATRIX) { 366 nz = aa->nz; 367 ai = aa->i; 368 aj = aa->j; 369 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 370 for (i=k=0; i<M; i++) { 371 rnz = ai[i+1] - ai[i]; 372 ajj = aj + ai[i]; 373 for (j=0; j<rnz; j++) { 374 PetscCall(PetscMUMPSIntCast(i+shift,&row[k])); 375 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k])); 376 k++; 377 } 378 } 379 mumps->irn = row; 380 mumps->jcn = col; 381 mumps->nnz = nz; 382 } 383 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 384 PetscFunctionReturn(0); 385 } 386 387 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 388 { 389 PetscInt64 nz,i,j,k,r; 390 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 391 PetscMUMPSInt *row,*col; 392 393 PetscFunctionBegin; 394 mumps->val = a->val; 395 if (reuse == MAT_INITIAL_MATRIX) { 396 nz = a->sliidx[a->totalslices]; 397 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 398 for (i=k=0; i<a->totalslices; i++) { 399 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 400 PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++])); 401 } 402 } 403 for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i])); 404 mumps->irn = row; 405 mumps->jcn = col; 406 mumps->nnz = nz; 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 412 { 413 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 414 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 415 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 416 PetscInt bs; 417 PetscMUMPSInt *row,*col; 418 419 PetscFunctionBegin; 420 PetscCall(MatGetBlockSize(A,&bs)); 421 M = A->rmap->N/bs; 422 mumps->val = aa->a; 423 if (reuse == MAT_INITIAL_MATRIX) { 424 ai = aa->i; aj = aa->j; 425 nz = bs2*aa->nz; 426 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 427 for (i=0; i<M; i++) { 428 ajj = aj + ai[i]; 429 rnz = ai[i+1] - ai[i]; 430 for (k=0; k<rnz; k++) { 431 for (j=0; j<bs; j++) { 432 for (m=0; m<bs; m++) { 433 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx])); 434 PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx])); 435 idx++; 436 } 437 } 438 } 439 } 440 mumps->irn = row; 441 mumps->jcn = col; 442 mumps->nnz = nz; 443 } 444 PetscFunctionReturn(0); 445 } 446 447 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 448 { 449 const PetscInt *ai, *aj,*ajj; 450 PetscInt bs; 451 PetscInt64 nz,rnz,i,j,k,m; 452 PetscMUMPSInt *row,*col; 453 PetscScalar *val; 454 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 455 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 456 #if defined(PETSC_USE_COMPLEX) 457 PetscBool isset,hermitian; 458 #endif 459 460 PetscFunctionBegin; 461 #if defined(PETSC_USE_COMPLEX) 462 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 463 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 464 #endif 465 ai = aa->i; 466 aj = aa->j; 467 PetscCall(MatGetBlockSize(A,&bs)); 468 if (reuse == MAT_INITIAL_MATRIX) { 469 nz = aa->nz; 470 PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col)); 471 if (bs>1) { 472 PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc)); 473 mumps->val = mumps->val_alloc; 474 } else { 475 mumps->val = aa->a; 476 } 477 mumps->irn = row; 478 mumps->jcn = col; 479 } else { 480 if (bs == 1) mumps->val = aa->a; 481 row = mumps->irn; 482 col = mumps->jcn; 483 } 484 val = mumps->val; 485 486 nz = 0; 487 if (bs>1) { 488 for (i=0; i<mbs; i++) { 489 rnz = ai[i+1] - ai[i]; 490 ajj = aj + ai[i]; 491 for (j=0; j<rnz; j++) { 492 for (k=0; k<bs; k++) { 493 for (m=0; m<bs; m++) { 494 if (ajj[j]>i || k>=m) { 495 if (reuse == MAT_INITIAL_MATRIX) { 496 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz])); 497 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz])); 498 } 499 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 500 } 501 } 502 } 503 } 504 } 505 } else if (reuse == MAT_INITIAL_MATRIX) { 506 for (i=0; i<mbs; i++) { 507 rnz = ai[i+1] - ai[i]; 508 ajj = aj + ai[i]; 509 for (j=0; j<rnz; j++) { 510 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 511 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 512 nz++; 513 } 514 } 515 PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 516 } 517 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 522 { 523 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 524 PetscInt64 nz,rnz,i,j; 525 const PetscScalar *av,*v1; 526 PetscScalar *val; 527 PetscMUMPSInt *row,*col; 528 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 529 PetscBool missing; 530 #if defined(PETSC_USE_COMPLEX) 531 PetscBool hermitian,isset; 532 #endif 533 534 PetscFunctionBegin; 535 #if defined(PETSC_USE_COMPLEX) 536 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 537 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 538 #endif 539 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 540 ai = aa->i; aj = aa->j; 541 adiag = aa->diag; 542 PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL)); 543 if (reuse == MAT_INITIAL_MATRIX) { 544 /* count nz in the upper triangular part of A */ 545 nz = 0; 546 if (missing) { 547 for (i=0; i<M; i++) { 548 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 549 for (j=ai[i];j<ai[i+1];j++) { 550 if (aj[j] < i) continue; 551 nz++; 552 } 553 } else { 554 nz += ai[i+1] - adiag[i]; 555 } 556 } 557 } else { 558 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 559 } 560 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 561 PetscCall(PetscMalloc1(nz,&val)); 562 mumps->nnz = nz; 563 mumps->irn = row; 564 mumps->jcn = col; 565 mumps->val = mumps->val_alloc = val; 566 567 nz = 0; 568 if (missing) { 569 for (i=0; i<M; i++) { 570 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 571 for (j=ai[i];j<ai[i+1];j++) { 572 if (aj[j] < i) continue; 573 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 574 PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz])); 575 val[nz] = av[j]; 576 nz++; 577 } 578 } else { 579 rnz = ai[i+1] - adiag[i]; 580 ajj = aj + adiag[i]; 581 v1 = av + adiag[i]; 582 for (j=0; j<rnz; j++) { 583 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 584 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 585 val[nz++] = v1[j]; 586 } 587 } 588 } 589 } else { 590 for (i=0; i<M; i++) { 591 rnz = ai[i+1] - adiag[i]; 592 ajj = aj + adiag[i]; 593 v1 = av + adiag[i]; 594 for (j=0; j<rnz; j++) { 595 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 596 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 597 val[nz++] = v1[j]; 598 } 599 } 600 } 601 } else { 602 nz = 0; 603 val = mumps->val; 604 if (missing) { 605 for (i=0; i <M; i++) { 606 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 607 for (j=ai[i];j<ai[i+1];j++) { 608 if (aj[j] < i) continue; 609 val[nz++] = av[j]; 610 } 611 } else { 612 rnz = ai[i+1] - adiag[i]; 613 v1 = av + adiag[i]; 614 for (j=0; j<rnz; j++) { 615 val[nz++] = v1[j]; 616 } 617 } 618 } 619 } else { 620 for (i=0; i <M; i++) { 621 rnz = ai[i+1] - adiag[i]; 622 v1 = av + adiag[i]; 623 for (j=0; j<rnz; j++) { 624 val[nz++] = v1[j]; 625 } 626 } 627 } 628 } 629 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 634 { 635 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 636 PetscInt bs; 637 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 638 PetscMUMPSInt *row,*col; 639 const PetscScalar *av,*bv,*v1,*v2; 640 PetscScalar *val; 641 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 642 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 643 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 644 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 645 #if defined(PETSC_USE_COMPLEX) 646 PetscBool hermitian,isset; 647 #endif 648 649 PetscFunctionBegin; 650 #if defined(PETSC_USE_COMPLEX) 651 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 652 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 653 #endif 654 PetscCall(MatGetBlockSize(A,&bs)); 655 rstart = A->rmap->rstart; 656 ai = aa->i; 657 aj = aa->j; 658 bi = bb->i; 659 bj = bb->j; 660 av = aa->a; 661 bv = bb->a; 662 663 garray = mat->garray; 664 665 if (reuse == MAT_INITIAL_MATRIX) { 666 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 667 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 668 PetscCall(PetscMalloc1(nz,&val)); 669 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 670 mumps->irn = row; 671 mumps->jcn = col; 672 mumps->val = mumps->val_alloc = val; 673 } else { 674 val = mumps->val; 675 } 676 677 jj = 0; irow = rstart; 678 for (i=0; i<mbs; i++) { 679 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 680 countA = ai[i+1] - ai[i]; 681 countB = bi[i+1] - bi[i]; 682 bjj = bj + bi[i]; 683 v1 = av + ai[i]*bs2; 684 v2 = bv + bi[i]*bs2; 685 686 if (bs>1) { 687 /* A-part */ 688 for (j=0; j<countA; j++) { 689 for (k=0; k<bs; k++) { 690 for (m=0; m<bs; m++) { 691 if (rstart + ajj[j]*bs>irow || k>=m) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 694 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj])); 695 } 696 val[jj++] = v1[j*bs2 + m + k*bs]; 697 } 698 } 699 } 700 } 701 702 /* B-part */ 703 for (j=0; j < countB; j++) { 704 for (k=0; k<bs; k++) { 705 for (m=0; m<bs; m++) { 706 if (reuse == MAT_INITIAL_MATRIX) { 707 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 708 PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj])); 709 } 710 val[jj++] = v2[j*bs2 + m + k*bs]; 711 } 712 } 713 } 714 } else { 715 /* A-part */ 716 for (j=0; j<countA; j++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 719 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 720 } 721 val[jj++] = v1[j]; 722 } 723 724 /* B-part */ 725 for (j=0; j < countB; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 728 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 729 } 730 val[jj++] = v2[j]; 731 } 732 } 733 irow+=bs; 734 } 735 mumps->nnz = jj; 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 740 { 741 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 742 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 743 PetscMUMPSInt *row,*col; 744 const PetscScalar *av, *bv,*v1,*v2; 745 PetscScalar *val; 746 Mat Ad,Ao; 747 Mat_SeqAIJ *aa; 748 Mat_SeqAIJ *bb; 749 750 PetscFunctionBegin; 751 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 752 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 753 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 754 755 aa = (Mat_SeqAIJ*)(Ad)->data; 756 bb = (Mat_SeqAIJ*)(Ao)->data; 757 ai = aa->i; 758 aj = aa->j; 759 bi = bb->i; 760 bj = bb->j; 761 762 rstart = A->rmap->rstart; 763 764 if (reuse == MAT_INITIAL_MATRIX) { 765 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 766 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 767 PetscCall(PetscMalloc1(nz,&val)); 768 mumps->nnz = nz; 769 mumps->irn = row; 770 mumps->jcn = col; 771 mumps->val = mumps->val_alloc = val; 772 } else { 773 val = mumps->val; 774 } 775 776 jj = 0; irow = rstart; 777 for (i=0; i<m; i++) { 778 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 779 countA = ai[i+1] - ai[i]; 780 countB = bi[i+1] - bi[i]; 781 bjj = bj + bi[i]; 782 v1 = av + ai[i]; 783 v2 = bv + bi[i]; 784 785 /* A-part */ 786 for (j=0; j<countA; j++) { 787 if (reuse == MAT_INITIAL_MATRIX) { 788 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 789 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 790 } 791 val[jj++] = v1[j]; 792 } 793 794 /* B-part */ 795 for (j=0; j < countB; j++) { 796 if (reuse == MAT_INITIAL_MATRIX) { 797 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 798 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 799 } 800 val[jj++] = v2[j]; 801 } 802 irow++; 803 } 804 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 805 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 810 { 811 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 812 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 813 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 814 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 815 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 816 const PetscInt bs2=mat->bs2; 817 PetscInt bs; 818 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 819 PetscMUMPSInt *row,*col; 820 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 821 PetscScalar *val; 822 823 PetscFunctionBegin; 824 PetscCall(MatGetBlockSize(A,&bs)); 825 if (reuse == MAT_INITIAL_MATRIX) { 826 nz = bs2*(aa->nz + bb->nz); 827 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 828 PetscCall(PetscMalloc1(nz,&val)); 829 mumps->nnz = nz; 830 mumps->irn = row; 831 mumps->jcn = col; 832 mumps->val = mumps->val_alloc = val; 833 } else { 834 val = mumps->val; 835 } 836 837 jj = 0; irow = rstart; 838 for (i=0; i<mbs; i++) { 839 countA = ai[i+1] - ai[i]; 840 countB = bi[i+1] - bi[i]; 841 ajj = aj + ai[i]; 842 bjj = bj + bi[i]; 843 v1 = av + bs2*ai[i]; 844 v2 = bv + bs2*bi[i]; 845 846 idx = 0; 847 /* A-part */ 848 for (k=0; k<countA; k++) { 849 for (j=0; j<bs; j++) { 850 for (n=0; n<bs; n++) { 851 if (reuse == MAT_INITIAL_MATRIX) { 852 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 853 PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj])); 854 } 855 val[jj++] = v1[idx++]; 856 } 857 } 858 } 859 860 idx = 0; 861 /* B-part */ 862 for (k=0; k<countB; k++) { 863 for (j=0; j<bs; j++) { 864 for (n=0; n<bs; n++) { 865 if (reuse == MAT_INITIAL_MATRIX) { 866 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 867 PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj])); 868 } 869 val[jj++] = v2[idx++]; 870 } 871 } 872 } 873 irow += bs; 874 } 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 879 { 880 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 881 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 882 PetscMUMPSInt *row,*col; 883 const PetscScalar *av, *bv,*v1,*v2; 884 PetscScalar *val; 885 Mat Ad,Ao; 886 Mat_SeqAIJ *aa; 887 Mat_SeqAIJ *bb; 888 #if defined(PETSC_USE_COMPLEX) 889 PetscBool hermitian,isset; 890 #endif 891 892 PetscFunctionBegin; 893 #if defined(PETSC_USE_COMPLEX) 894 PetscCall(MatIsHermitianKnown(A,&isset,&hermitian)); 895 PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 896 #endif 897 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 898 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 899 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 900 901 aa = (Mat_SeqAIJ*)(Ad)->data; 902 bb = (Mat_SeqAIJ*)(Ao)->data; 903 ai = aa->i; 904 aj = aa->j; 905 adiag = aa->diag; 906 bi = bb->i; 907 bj = bb->j; 908 909 rstart = A->rmap->rstart; 910 911 if (reuse == MAT_INITIAL_MATRIX) { 912 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 913 nzb = 0; /* num of upper triangular entries in mat->B */ 914 for (i=0; i<m; i++) { 915 nza += (ai[i+1] - adiag[i]); 916 countB = bi[i+1] - bi[i]; 917 bjj = bj + bi[i]; 918 for (j=0; j<countB; j++) { 919 if (garray[bjj[j]] > rstart) nzb++; 920 } 921 } 922 923 nz = nza + nzb; /* total nz of upper triangular part of mat */ 924 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 925 PetscCall(PetscMalloc1(nz,&val)); 926 mumps->nnz = nz; 927 mumps->irn = row; 928 mumps->jcn = col; 929 mumps->val = mumps->val_alloc = val; 930 } else { 931 val = mumps->val; 932 } 933 934 jj = 0; irow = rstart; 935 for (i=0; i<m; i++) { 936 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 937 v1 = av + adiag[i]; 938 countA = ai[i+1] - adiag[i]; 939 countB = bi[i+1] - bi[i]; 940 bjj = bj + bi[i]; 941 v2 = bv + bi[i]; 942 943 /* A-part */ 944 for (j=0; j<countA; j++) { 945 if (reuse == MAT_INITIAL_MATRIX) { 946 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 947 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 948 } 949 val[jj++] = v1[j]; 950 } 951 952 /* B-part */ 953 for (j=0; j < countB; j++) { 954 if (garray[bjj[j]] > rstart) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 957 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 958 } 959 val[jj++] = v2[j]; 960 } 961 } 962 irow++; 963 } 964 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 965 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode MatDestroy_MUMPS(Mat A) 970 { 971 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 972 973 PetscFunctionBegin; 974 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 975 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 976 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 977 PetscCall(VecDestroy(&mumps->b_seq)); 978 PetscCall(VecDestroy(&mumps->x_seq)); 979 PetscCall(PetscFree(mumps->id.perm_in)); 980 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 981 PetscCall(PetscFree(mumps->val_alloc)); 982 PetscCall(PetscFree(mumps->info)); 983 PetscCall(PetscFree(mumps->ICNTL_pre)); 984 PetscCall(PetscFree(mumps->CNTL_pre)); 985 PetscCall(MatMumpsResetSchur_Private(mumps)); 986 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 987 mumps->id.job = JOB_END; 988 PetscMUMPS_c(mumps); 989 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 990 if (mumps->mumps_comm != MPI_COMM_NULL) { 991 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 992 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 993 } 994 } 995 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 996 if (mumps->use_petsc_omp_support) { 997 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 998 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 999 PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps)); 1000 } 1001 #endif 1002 PetscCall(PetscFree(mumps->ia_alloc)); 1003 PetscCall(PetscFree(mumps->ja_alloc)); 1004 PetscCall(PetscFree(mumps->recvcount)); 1005 PetscCall(PetscFree(mumps->reqs)); 1006 PetscCall(PetscFree(mumps->irhs_loc)); 1007 PetscCall(PetscFree(A->data)); 1008 1009 /* clear composed functions */ 1010 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1011 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL)); 1012 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL)); 1013 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL)); 1014 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL)); 1017 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL)); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1027 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1028 { 1029 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1030 const PetscMPIInt ompsize=mumps->omp_comm_size; 1031 PetscInt i,m,M,rstart; 1032 1033 PetscFunctionBegin; 1034 PetscCall(MatGetSize(A,&M,NULL)); 1035 PetscCall(MatGetLocalSize(A,&m,NULL)); 1036 PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1037 if (ompsize == 1) { 1038 if (!mumps->irhs_loc) { 1039 mumps->nloc_rhs = m; 1040 PetscCall(PetscMalloc1(m,&mumps->irhs_loc)); 1041 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1042 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1043 } 1044 mumps->id.rhs_loc = (MumpsScalar*)array; 1045 } else { 1046 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1047 const PetscInt *ranges; 1048 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1049 MPI_Group petsc_group,omp_group; 1050 PetscScalar *recvbuf=NULL; 1051 1052 if (mumps->is_omp_master) { 1053 /* Lazily initialize the omp stuff for distributed rhs */ 1054 if (!mumps->irhs_loc) { 1055 PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks)); 1056 PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps)); 1057 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group)); 1058 PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group)); 1059 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1060 PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks)); 1061 1062 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1063 mumps->nloc_rhs = 0; 1064 PetscCall(MatGetOwnershipRanges(A,&ranges)); 1065 for (j=0; j<ompsize; j++) { 1066 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1067 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1068 } 1069 PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc)); 1070 for (j=k=0; j<ompsize; j++) { 1071 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1072 } 1073 1074 PetscCall(PetscFree2(omp_ranks,petsc_ranks)); 1075 PetscCallMPI(MPI_Group_free(&petsc_group)); 1076 PetscCallMPI(MPI_Group_free(&omp_group)); 1077 } 1078 1079 /* Realloc buffers when current nrhs is bigger than what we have met */ 1080 if (nrhs > mumps->max_nrhs) { 1081 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 1082 PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf)); 1083 mumps->max_nrhs = nrhs; 1084 } 1085 1086 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1087 for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j])); 1088 mumps->rhs_disps[0] = 0; 1089 for (j=1; j<ompsize; j++) { 1090 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1091 PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1092 } 1093 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1094 } 1095 1096 PetscCall(PetscMPIIntCast(m*nrhs,&sendcount)); 1097 PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm)); 1098 1099 if (mumps->is_omp_master) { 1100 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1101 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1102 for (j=0; j<ompsize; j++) { 1103 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1104 dst = dstbase; 1105 for (i=0; i<nrhs; i++) { 1106 PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j])); 1107 src += mumps->rhs_nrow[j]; 1108 dst += mumps->nloc_rhs; 1109 } 1110 dstbase += mumps->rhs_nrow[j]; 1111 } 1112 } 1113 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1114 } 1115 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1116 } 1117 mumps->id.nrhs = nrhs; 1118 mumps->id.nloc_rhs = mumps->nloc_rhs; 1119 mumps->id.lrhs_loc = mumps->nloc_rhs; 1120 mumps->id.irhs_loc = mumps->irhs_loc; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1125 { 1126 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1127 const PetscScalar *rarray = NULL; 1128 PetscScalar *array; 1129 IS is_iden,is_petsc; 1130 PetscInt i; 1131 PetscBool second_solve = PETSC_FALSE; 1132 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1133 1134 PetscFunctionBegin; 1135 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1)); 1136 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2)); 1137 1138 if (A->factorerrortype) { 1139 PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1140 PetscCall(VecSetInf(x)); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 mumps->id.nrhs = 1; 1145 if (mumps->petsc_size > 1) { 1146 if (mumps->ICNTL20 == 10) { 1147 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1148 PetscCall(VecGetArrayRead(b,&rarray)); 1149 PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray)); 1150 } else { 1151 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1152 PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1153 PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1154 if (!mumps->myid) { 1155 PetscCall(VecGetArray(mumps->b_seq,&array)); 1156 mumps->id.rhs = (MumpsScalar*)array; 1157 } 1158 } 1159 } else { /* petsc_size == 1 */ 1160 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1161 PetscCall(VecCopy(b,x)); 1162 PetscCall(VecGetArray(x,&array)); 1163 mumps->id.rhs = (MumpsScalar*)array; 1164 } 1165 1166 /* 1167 handle condensation step of Schur complement (if any) 1168 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1169 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1170 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1171 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1172 */ 1173 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1174 PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1175 second_solve = PETSC_TRUE; 1176 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1177 } 1178 /* solve phase */ 1179 /*-------------*/ 1180 mumps->id.job = JOB_SOLVE; 1181 PetscMUMPS_c(mumps); 1182 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1183 1184 /* handle expansion step of Schur complement (if any) */ 1185 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1186 1187 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1188 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1189 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1190 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1191 } 1192 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1193 PetscInt *isol2_loc=NULL; 1194 PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */ 1195 PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc)); 1196 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc)); /* to */ 1198 PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol)); 1199 PetscCall(ISDestroy(&is_iden)); 1200 PetscCall(ISDestroy(&is_petsc)); 1201 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1202 } 1203 1204 PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1205 PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1206 } 1207 1208 if (mumps->petsc_size > 1) { 1209 if (mumps->ICNTL20 == 10) { 1210 PetscCall(VecRestoreArrayRead(b,&rarray)); 1211 } else if (!mumps->myid) { 1212 PetscCall(VecRestoreArray(mumps->b_seq,&array)); 1213 } 1214 } else PetscCall(VecRestoreArray(x,&array)); 1215 1216 PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3))); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1221 { 1222 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1223 1224 PetscFunctionBegin; 1225 mumps->id.ICNTL(9) = 0; 1226 PetscCall(MatSolve_MUMPS(A,b,x)); 1227 mumps->id.ICNTL(9) = 1; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1232 { 1233 Mat Bt = NULL; 1234 PetscBool denseX,denseB,flg,flgT; 1235 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1236 PetscInt i,nrhs,M; 1237 PetscScalar *array; 1238 const PetscScalar *rbray; 1239 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1240 PetscMUMPSInt *isol_loc,*isol_loc_save; 1241 PetscScalar *bray,*sol_loc,*sol_loc_save; 1242 IS is_to,is_from; 1243 PetscInt k,proc,j,m,myrstart; 1244 const PetscInt *rstart; 1245 Vec v_mpi,msol_loc; 1246 VecScatter scat_sol; 1247 Vec b_seq; 1248 VecScatter scat_rhs; 1249 PetscScalar *aa; 1250 PetscInt spnr,*ia,*ja; 1251 Mat_MPIAIJ *b = NULL; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL)); 1255 PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1256 1257 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL)); 1258 if (denseB) { 1259 PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1260 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1261 } else { /* sparse B */ 1262 PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1263 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT)); 1264 if (flgT) { /* input B is transpose of actural RHS matrix, 1265 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1266 PetscCall(MatTransposeGetMat(B,&Bt)); 1267 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1268 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1269 } 1270 1271 PetscCall(MatGetSize(B,&M,&nrhs)); 1272 mumps->id.nrhs = nrhs; 1273 mumps->id.lrhs = M; 1274 mumps->id.rhs = NULL; 1275 1276 if (mumps->petsc_size == 1) { 1277 PetscScalar *aa; 1278 PetscInt spnr,*ia,*ja; 1279 PetscBool second_solve = PETSC_FALSE; 1280 1281 PetscCall(MatDenseGetArray(X,&array)); 1282 mumps->id.rhs = (MumpsScalar*)array; 1283 1284 if (denseB) { 1285 /* copy B to X */ 1286 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1287 PetscCall(PetscArraycpy(array,rbray,M*nrhs)); 1288 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1289 } else { /* sparse B */ 1290 PetscCall(MatSeqAIJGetArray(Bt,&aa)); 1291 PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1292 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1293 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1294 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1295 } 1296 /* handle condensation step of Schur complement (if any) */ 1297 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1298 second_solve = PETSC_TRUE; 1299 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1300 } 1301 /* solve phase */ 1302 /*-------------*/ 1303 mumps->id.job = JOB_SOLVE; 1304 PetscMUMPS_c(mumps); 1305 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1306 1307 /* handle expansion step of Schur complement (if any) */ 1308 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1309 if (!denseB) { /* sparse B */ 1310 PetscCall(MatSeqAIJRestoreArray(Bt,&aa)); 1311 PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1312 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1313 } 1314 PetscCall(MatDenseRestoreArray(X,&array)); 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1319 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1320 1321 /* create msol_loc to hold mumps local solution */ 1322 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1323 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1324 1325 lsol_loc = mumps->id.lsol_loc; 1326 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1327 PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc)); 1328 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1329 mumps->id.isol_loc = isol_loc; 1330 1331 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc)); 1332 1333 if (denseB) { 1334 if (mumps->ICNTL20 == 10) { 1335 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1336 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1337 PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray)); 1338 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1339 PetscCall(MatGetLocalSize(B,&m,NULL)); 1340 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi)); 1341 } else { 1342 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1343 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1344 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1345 0, re-arrange B into desired order, which is a local operation. 1346 */ 1347 1348 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1349 /* wrap dense rhs matrix B into a vector v_mpi */ 1350 PetscCall(MatGetLocalSize(B,&m,NULL)); 1351 PetscCall(MatDenseGetArray(B,&bray)); 1352 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1353 PetscCall(MatDenseRestoreArray(B,&bray)); 1354 1355 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1356 if (!mumps->myid) { 1357 PetscInt *idx; 1358 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1359 PetscCall(PetscMalloc1(nrhs*M,&idx)); 1360 PetscCall(MatGetOwnershipRanges(B,&rstart)); 1361 k = 0; 1362 for (proc=0; proc<mumps->petsc_size; proc++) { 1363 for (j=0; j<nrhs; j++) { 1364 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1365 } 1366 } 1367 1368 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq)); 1369 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to)); 1370 PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from)); 1371 } else { 1372 PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq)); 1373 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to)); 1374 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from)); 1375 } 1376 PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs)); 1377 PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1378 PetscCall(ISDestroy(&is_to)); 1379 PetscCall(ISDestroy(&is_from)); 1380 PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1381 1382 if (!mumps->myid) { /* define rhs on the host */ 1383 PetscCall(VecGetArray(b_seq,&bray)); 1384 mumps->id.rhs = (MumpsScalar*)bray; 1385 PetscCall(VecRestoreArray(b_seq,&bray)); 1386 } 1387 } 1388 } else { /* sparse B */ 1389 b = (Mat_MPIAIJ*)Bt->data; 1390 1391 /* wrap dense X into a vector v_mpi */ 1392 PetscCall(MatGetLocalSize(X,&m,NULL)); 1393 PetscCall(MatDenseGetArray(X,&bray)); 1394 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1395 PetscCall(MatDenseRestoreArray(X,&bray)); 1396 1397 if (!mumps->myid) { 1398 PetscCall(MatSeqAIJGetArray(b->A,&aa)); 1399 PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1400 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1401 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1402 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1403 } else { 1404 mumps->id.irhs_ptr = NULL; 1405 mumps->id.irhs_sparse = NULL; 1406 mumps->id.nz_rhs = 0; 1407 mumps->id.rhs_sparse = NULL; 1408 } 1409 } 1410 1411 /* solve phase */ 1412 /*-------------*/ 1413 mumps->id.job = JOB_SOLVE; 1414 PetscMUMPS_c(mumps); 1415 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1416 1417 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1418 PetscCall(MatDenseGetArray(X,&array)); 1419 PetscCall(VecPlaceArray(v_mpi,array)); 1420 1421 /* create scatter scat_sol */ 1422 PetscCall(MatGetOwnershipRanges(X,&rstart)); 1423 /* iidx: index for scatter mumps solution to petsc X */ 1424 1425 PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from)); 1426 PetscCall(PetscMalloc1(nlsol_loc,&idxx)); 1427 for (i=0; i<lsol_loc; i++) { 1428 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1429 1430 for (proc=0; proc<mumps->petsc_size; proc++) { 1431 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1432 myrstart = rstart[proc]; 1433 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1434 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1435 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1436 break; 1437 } 1438 } 1439 1440 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1441 } 1442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to)); 1443 PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol)); 1444 PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1445 PetscCall(ISDestroy(&is_from)); 1446 PetscCall(ISDestroy(&is_to)); 1447 PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1448 PetscCall(MatDenseRestoreArray(X,&array)); 1449 1450 /* free spaces */ 1451 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1452 mumps->id.isol_loc = isol_loc_save; 1453 1454 PetscCall(PetscFree2(sol_loc,isol_loc)); 1455 PetscCall(PetscFree(idxx)); 1456 PetscCall(VecDestroy(&msol_loc)); 1457 PetscCall(VecDestroy(&v_mpi)); 1458 if (!denseB) { 1459 if (!mumps->myid) { 1460 b = (Mat_MPIAIJ*)Bt->data; 1461 PetscCall(MatSeqAIJRestoreArray(b->A,&aa)); 1462 PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1463 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1464 } 1465 } else { 1466 if (mumps->ICNTL20 == 0) { 1467 PetscCall(VecDestroy(&b_seq)); 1468 PetscCall(VecScatterDestroy(&scat_rhs)); 1469 } 1470 } 1471 PetscCall(VecScatterDestroy(&scat_sol)); 1472 PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3))); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1477 { 1478 PetscBool flg; 1479 Mat B; 1480 1481 PetscFunctionBegin; 1482 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 1483 PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1484 1485 /* Create B=Bt^T that uses Bt's data structure */ 1486 PetscCall(MatCreateTranspose(Bt,&B)); 1487 1488 PetscCall(MatMatSolve_MUMPS(A,B,X)); 1489 PetscCall(MatDestroy(&B)); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #if !defined(PETSC_USE_COMPLEX) 1494 /* 1495 input: 1496 F: numeric factor 1497 output: 1498 nneg: total number of negative pivots 1499 nzero: total number of zero pivots 1500 npos: (global dimension of F) - nneg - nzero 1501 */ 1502 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1503 { 1504 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1505 PetscMPIInt size; 1506 1507 PetscFunctionBegin; 1508 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size)); 1509 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1510 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1511 1512 if (nneg) *nneg = mumps->id.INFOG(12); 1513 if (nzero || npos) { 1514 PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1515 if (nzero) *nzero = mumps->id.INFOG(28); 1516 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 #endif 1521 1522 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1523 { 1524 PetscInt i,nreqs; 1525 PetscMUMPSInt *irn,*jcn; 1526 PetscMPIInt count; 1527 PetscInt64 totnnz,remain; 1528 const PetscInt osize=mumps->omp_comm_size; 1529 PetscScalar *val; 1530 1531 PetscFunctionBegin; 1532 if (osize > 1) { 1533 if (reuse == MAT_INITIAL_MATRIX) { 1534 /* master first gathers counts of nonzeros to receive */ 1535 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount)); 1536 PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm)); 1537 1538 /* Then each computes number of send/recvs */ 1539 if (mumps->is_omp_master) { 1540 /* Start from 1 since self communication is not done in MPI */ 1541 nreqs = 0; 1542 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1543 } else { 1544 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1545 } 1546 PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1547 1548 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1549 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1550 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1551 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1552 */ 1553 nreqs = 0; /* counter for actual send/recvs */ 1554 if (mumps->is_omp_master) { 1555 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1556 PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn)); 1557 PetscCall(PetscMalloc1(totnnz,&val)); 1558 1559 /* Self communication */ 1560 PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz)); 1561 PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz)); 1562 PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz)); 1563 1564 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1565 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 1566 PetscCall(PetscFree(mumps->val_alloc)); 1567 mumps->nnz = totnnz; 1568 mumps->irn = irn; 1569 mumps->jcn = jcn; 1570 mumps->val = mumps->val_alloc = val; 1571 1572 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1573 jcn += mumps->recvcount[0]; 1574 val += mumps->recvcount[0]; 1575 1576 /* Remote communication */ 1577 for (i=1; i<osize; i++) { 1578 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1579 remain = mumps->recvcount[i] - count; 1580 while (count>0) { 1581 PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1582 PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1583 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1584 irn += count; 1585 jcn += count; 1586 val += count; 1587 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1588 remain -= count; 1589 } 1590 } 1591 } else { 1592 irn = mumps->irn; 1593 jcn = mumps->jcn; 1594 val = mumps->val; 1595 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1596 remain = mumps->nnz - count; 1597 while (count>0) { 1598 PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1599 PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1600 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1601 irn += count; 1602 jcn += count; 1603 val += count; 1604 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1605 remain -= count; 1606 } 1607 } 1608 } else { 1609 nreqs = 0; 1610 if (mumps->is_omp_master) { 1611 val = mumps->val + mumps->recvcount[0]; 1612 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1613 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1614 remain = mumps->recvcount[i] - count; 1615 while (count>0) { 1616 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1617 val += count; 1618 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1619 remain -= count; 1620 } 1621 } 1622 } else { 1623 val = mumps->val; 1624 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1625 remain = mumps->nnz - count; 1626 while (count>0) { 1627 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1628 val += count; 1629 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1630 remain -= count; 1631 } 1632 } 1633 } 1634 PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE)); 1635 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1636 } 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1641 { 1642 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1643 PetscBool isMPIAIJ; 1644 1645 PetscFunctionBegin; 1646 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1647 if (mumps->id.INFOG(1) == -6) { 1648 PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1649 } 1650 PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1655 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps)); 1656 1657 /* numerical factorization phase */ 1658 /*-------------------------------*/ 1659 mumps->id.job = JOB_FACTNUMERIC; 1660 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1661 if (!mumps->myid) { 1662 mumps->id.a = (MumpsScalar*)mumps->val; 1663 } 1664 } else { 1665 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1666 } 1667 PetscMUMPS_c(mumps); 1668 if (mumps->id.INFOG(1) < 0) { 1669 if (A->erroriffailure) { 1670 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1671 } else { 1672 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1673 PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1674 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1675 } else if (mumps->id.INFOG(1) == -13) { 1676 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1677 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1678 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1679 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1680 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1681 } else { 1682 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1683 F->factorerrortype = MAT_FACTOR_OTHER; 1684 } 1685 } 1686 } 1687 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1688 1689 F->assembled = PETSC_TRUE; 1690 1691 if (F->schur) { /* reset Schur status to unfactored */ 1692 #if defined(PETSC_HAVE_CUDA) 1693 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1694 #endif 1695 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1696 mumps->id.ICNTL(19) = 2; 1697 PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur)); 1698 } 1699 PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED)); 1700 } 1701 1702 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1703 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1704 1705 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1706 if (mumps->petsc_size > 1) { 1707 PetscInt lsol_loc; 1708 PetscScalar *sol_loc; 1709 1710 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ)); 1711 1712 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1713 if (mumps->x_seq) { 1714 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1715 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 1716 PetscCall(VecDestroy(&mumps->x_seq)); 1717 } 1718 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1719 PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc)); 1720 mumps->id.lsol_loc = lsol_loc; 1721 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1722 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq)); 1723 } 1724 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /* Sets MUMPS options from the options database */ 1729 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1730 { 1731 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1732 PetscMUMPSInt icntl=0,size,*listvar_schur; 1733 PetscInt info[80],i,ninfo=80,rbs,cbs; 1734 PetscBool flg=PETSC_FALSE,schur=(PetscBool)(mumps->id.ICNTL(26) == -1); 1735 MumpsScalar *arr; 1736 1737 PetscFunctionBegin; 1738 PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat"); 1739 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 1740 PetscInt nthreads = 0; 1741 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 1742 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 1743 1744 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1745 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size)); 1746 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */ 1747 1748 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads","Convert MPI processes into OpenMP threads","None",&mumps->use_petsc_omp_support)); 1749 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1750 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 1751 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL)); 1752 if (mumps->use_petsc_omp_support) { 1753 PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT),PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)F)->prefix?((PetscObject)F)->prefix:""); 1754 PetscCheck(!schur,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature",((PetscObject)F)->prefix?((PetscObject)F)->prefix:""); 1755 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1756 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl)); 1757 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master)); 1758 #endif 1759 } else { 1760 mumps->omp_comm = PETSC_COMM_SELF; 1761 mumps->mumps_comm = mumps->petsc_comm; 1762 mumps->is_omp_master = PETSC_TRUE; 1763 } 1764 PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size)); 1765 mumps->reqs = NULL; 1766 mumps->tag = 0; 1767 1768 if (mumps->mumps_comm != MPI_COMM_NULL) { 1769 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 1770 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1771 MPI_Comm comm; 1772 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm,&comm)); 1773 mumps->mumps_comm = comm; 1774 } else PetscCall(PetscCommGetComm(mumps->petsc_comm,&mumps->mumps_comm)); 1775 } 1776 1777 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1778 mumps->id.job = JOB_INIT; 1779 mumps->id.par = 1; /* host participates factorizaton and solve */ 1780 mumps->id.sym = mumps->sym; 1781 1782 size = mumps->id.size_schur; 1783 arr = mumps->id.schur; 1784 listvar_schur = mumps->id.listvar_schur; 1785 PetscMUMPS_c(mumps); 1786 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1787 /* restore cached ICNTL and CNTL values */ 1788 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1+2*icntl]) = mumps->ICNTL_pre[2+2*icntl]; 1789 for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1+2*icntl]) = mumps->CNTL_pre[2+2*icntl]; 1790 PetscCall(PetscFree(mumps->ICNTL_pre)); 1791 PetscCall(PetscFree(mumps->CNTL_pre)); 1792 1793 if (schur) { 1794 mumps->id.size_schur = size; 1795 mumps->id.schur_lld = size; 1796 mumps->id.schur = arr; 1797 mumps->id.listvar_schur = listvar_schur; 1798 if (mumps->petsc_size > 1) { 1799 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 1800 1801 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1802 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1803 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm)); 1804 PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 1805 } else { 1806 if (F->factortype == MAT_FACTOR_LU) { 1807 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1808 } else { 1809 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1810 } 1811 } 1812 mumps->id.ICNTL(26) = -1; 1813 } 1814 1815 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1816 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1817 */ 1818 PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm)); 1819 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm)); 1820 1821 mumps->scat_rhs = NULL; 1822 mumps->scat_sol = NULL; 1823 1824 /* set PETSc-MUMPS default options - override MUMPS default */ 1825 mumps->id.ICNTL(3) = 0; 1826 mumps->id.ICNTL(4) = 0; 1827 if (mumps->petsc_size == 1) { 1828 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1829 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1830 } else { 1831 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1832 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1833 } 1834 } 1835 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg)); 1836 if (flg) mumps->id.ICNTL(1) = icntl; 1837 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg)); 1838 if (flg) mumps->id.ICNTL(2) = icntl; 1839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg)); 1840 if (flg) mumps->id.ICNTL(3) = icntl; 1841 1842 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg)); 1843 if (flg) mumps->id.ICNTL(4) = icntl; 1844 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1845 1846 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg)); 1847 if (flg) mumps->id.ICNTL(6) = icntl; 1848 1849 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg)); 1850 if (flg) { 1851 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1852 mumps->id.ICNTL(7) = icntl; 1853 } 1854 1855 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL)); 1856 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1857 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL)); 1858 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL)); 1859 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL)); 1860 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL)); 1861 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL)); 1862 PetscCall(MatGetBlockSizes(A,&rbs,&cbs)); 1863 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1864 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg)); 1865 if (flg) { 1866 PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled"); 1867 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1868 } 1869 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL)); 1870 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1871 PetscCall(MatDestroy(&F->schur)); 1872 PetscCall(MatMumpsResetSchur_Private(mumps)); 1873 } 1874 1875 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1876 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1877 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1878 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1879 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1880 In short, we could not use distributed RHS with MPICH until v4.0b1. 1881 */ 1882 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1883 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1884 #else 1885 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1886 #endif 1887 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg)); 1888 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1889 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1890 PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1891 #endif 1892 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1893 1894 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL)); 1895 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL)); 1896 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL)); 1897 if (mumps->id.ICNTL(24)) { 1898 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1899 } 1900 1901 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL)); 1902 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL)); 1903 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL)); 1904 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL)); 1905 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL)); 1906 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1907 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL)); 1908 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1909 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL)); 1910 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL)); 1911 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL)); 1912 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL)); 1913 1914 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL)); 1915 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL)); 1916 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL)); 1917 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL)); 1918 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL)); 1919 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL)); 1920 1921 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1922 1923 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL)); 1924 if (ninfo) { 1925 PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1926 PetscCall(PetscMalloc1(ninfo,&mumps->info)); 1927 mumps->ninfo = ninfo; 1928 for (i=0; i<ninfo; i++) { 1929 PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1930 mumps->info[i] = info[i]; 1931 } 1932 } 1933 PetscOptionsEnd(); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1938 { 1939 PetscFunctionBegin; 1940 if (mumps->id.INFOG(1) < 0) { 1941 if (A->erroriffailure) { 1942 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1943 } else { 1944 if (mumps->id.INFOG(1) == -6) { 1945 PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1946 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1947 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1948 PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1949 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1950 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1951 PetscCall(PetscInfo(F,"Empty matrix\n")); 1952 } else { 1953 PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1954 F->factorerrortype = MAT_FACTOR_OTHER; 1955 } 1956 } 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1962 { 1963 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1964 Vec b; 1965 const PetscInt M = A->rmap->N; 1966 1967 PetscFunctionBegin; 1968 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1969 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1970 PetscFunctionReturn(0); 1971 } 1972 1973 /* Set MUMPS options from the options database */ 1974 PetscCall(MatSetFromOptions_MUMPS(F,A)); 1975 1976 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1977 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 1978 1979 /* analysis phase */ 1980 /*----------------*/ 1981 mumps->id.job = JOB_FACTSYMBOLIC; 1982 mumps->id.n = M; 1983 switch (mumps->id.ICNTL(18)) { 1984 case 0: /* centralized assembled matrix input */ 1985 if (!mumps->myid) { 1986 mumps->id.nnz = mumps->nnz; 1987 mumps->id.irn = mumps->irn; 1988 mumps->id.jcn = mumps->jcn; 1989 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1990 if (r) { 1991 mumps->id.ICNTL(7) = 1; 1992 if (!mumps->myid) { 1993 const PetscInt *idx; 1994 PetscInt i; 1995 1996 PetscCall(PetscMalloc1(M,&mumps->id.perm_in)); 1997 PetscCall(ISGetIndices(r,&idx)); 1998 for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 1999 PetscCall(ISRestoreIndices(r,&idx)); 2000 } 2001 } 2002 } 2003 break; 2004 case 3: /* distributed assembled matrix input (size>1) */ 2005 mumps->id.nnz_loc = mumps->nnz; 2006 mumps->id.irn_loc = mumps->irn; 2007 mumps->id.jcn_loc = mumps->jcn; 2008 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 2009 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2010 PetscCall(MatCreateVecs(A,NULL,&b)); 2011 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2012 PetscCall(VecDestroy(&b)); 2013 } 2014 break; 2015 } 2016 PetscMUMPS_c(mumps); 2017 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2018 2019 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2020 F->ops->solve = MatSolve_MUMPS; 2021 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2022 F->ops->matsolve = MatMatSolve_MUMPS; 2023 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2024 2025 mumps->matstruc = SAME_NONZERO_PATTERN; 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /* Note the Petsc r and c permutations are ignored */ 2030 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2031 { 2032 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2033 Vec b; 2034 const PetscInt M = A->rmap->N; 2035 2036 PetscFunctionBegin; 2037 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2038 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2039 PetscFunctionReturn(0); 2040 } 2041 2042 /* Set MUMPS options from the options database */ 2043 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2044 2045 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2046 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2047 2048 /* analysis phase */ 2049 /*----------------*/ 2050 mumps->id.job = JOB_FACTSYMBOLIC; 2051 mumps->id.n = M; 2052 switch (mumps->id.ICNTL(18)) { 2053 case 0: /* centralized assembled matrix input */ 2054 if (!mumps->myid) { 2055 mumps->id.nnz = mumps->nnz; 2056 mumps->id.irn = mumps->irn; 2057 mumps->id.jcn = mumps->jcn; 2058 if (mumps->id.ICNTL(6)>1) { 2059 mumps->id.a = (MumpsScalar*)mumps->val; 2060 } 2061 } 2062 break; 2063 case 3: /* distributed assembled matrix input (size>1) */ 2064 mumps->id.nnz_loc = mumps->nnz; 2065 mumps->id.irn_loc = mumps->irn; 2066 mumps->id.jcn_loc = mumps->jcn; 2067 if (mumps->id.ICNTL(6)>1) { 2068 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2069 } 2070 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2071 PetscCall(MatCreateVecs(A,NULL,&b)); 2072 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2073 PetscCall(VecDestroy(&b)); 2074 } 2075 break; 2076 } 2077 PetscMUMPS_c(mumps); 2078 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2079 2080 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2081 F->ops->solve = MatSolve_MUMPS; 2082 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2083 2084 mumps->matstruc = SAME_NONZERO_PATTERN; 2085 PetscFunctionReturn(0); 2086 } 2087 2088 /* Note the Petsc r permutation and factor info are ignored */ 2089 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2090 { 2091 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2092 Vec b; 2093 const PetscInt M = A->rmap->N; 2094 2095 PetscFunctionBegin; 2096 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2097 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2098 PetscFunctionReturn(0); 2099 } 2100 2101 /* Set MUMPS options from the options database */ 2102 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2103 2104 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2105 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2106 2107 /* analysis phase */ 2108 /*----------------*/ 2109 mumps->id.job = JOB_FACTSYMBOLIC; 2110 mumps->id.n = M; 2111 switch (mumps->id.ICNTL(18)) { 2112 case 0: /* centralized assembled matrix input */ 2113 if (!mumps->myid) { 2114 mumps->id.nnz = mumps->nnz; 2115 mumps->id.irn = mumps->irn; 2116 mumps->id.jcn = mumps->jcn; 2117 if (mumps->id.ICNTL(6)>1) { 2118 mumps->id.a = (MumpsScalar*)mumps->val; 2119 } 2120 } 2121 break; 2122 case 3: /* distributed assembled matrix input (size>1) */ 2123 mumps->id.nnz_loc = mumps->nnz; 2124 mumps->id.irn_loc = mumps->irn; 2125 mumps->id.jcn_loc = mumps->jcn; 2126 if (mumps->id.ICNTL(6)>1) { 2127 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2128 } 2129 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2130 PetscCall(MatCreateVecs(A,NULL,&b)); 2131 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2132 PetscCall(VecDestroy(&b)); 2133 } 2134 break; 2135 } 2136 PetscMUMPS_c(mumps); 2137 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2138 2139 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2140 F->ops->solve = MatSolve_MUMPS; 2141 F->ops->solvetranspose = MatSolve_MUMPS; 2142 F->ops->matsolve = MatMatSolve_MUMPS; 2143 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2144 #if defined(PETSC_USE_COMPLEX) 2145 F->ops->getinertia = NULL; 2146 #else 2147 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2148 #endif 2149 2150 mumps->matstruc = SAME_NONZERO_PATTERN; 2151 PetscFunctionReturn(0); 2152 } 2153 2154 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2155 { 2156 PetscBool iascii; 2157 PetscViewerFormat format; 2158 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2159 2160 PetscFunctionBegin; 2161 /* check if matrix is mumps type */ 2162 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2163 2164 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 2165 if (iascii) { 2166 PetscCall(PetscViewerGetFormat(viewer,&format)); 2167 if (format == PETSC_VIEWER_ASCII_INFO) { 2168 PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n")); 2169 PetscCall(PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym)); 2170 PetscCall(PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par)); 2171 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2))); 2173 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3))); 2174 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4))); 2175 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11))); 2181 if (mumps->id.ICNTL(11)>0) { 2182 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9))); 2187 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11))); 2188 } 2189 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12))); 2190 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13))); 2191 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14))); 2192 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(15) (compression of the input matrix): %d\n",mumps->id.ICNTL(15))); 2193 /* ICNTL(15-17) not used */ 2194 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18))); 2195 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19))); 2196 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20))); 2197 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21))); 2198 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22))); 2199 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23))); 2200 2201 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24))); 2202 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25))); 2203 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26))); 2204 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27))); 2205 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28))); 2206 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29))); 2207 2208 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30))); 2209 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31))); 2210 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33))); 2211 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35))); 2212 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36))); 2213 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38))); 2214 2215 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1))); 2216 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2))); 2217 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3))); 2218 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4))); 2219 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5))); 2220 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7))); 2221 2222 /* information local to each processor */ 2223 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2224 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2225 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1))); 2226 PetscCall(PetscViewerFlush(viewer)); 2227 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2228 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2))); 2229 PetscCall(PetscViewerFlush(viewer)); 2230 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2231 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3))); 2232 PetscCall(PetscViewerFlush(viewer)); 2233 2234 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2235 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15))); 2236 PetscCall(PetscViewerFlush(viewer)); 2237 2238 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2239 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16))); 2240 PetscCall(PetscViewerFlush(viewer)); 2241 2242 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2243 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23))); 2244 PetscCall(PetscViewerFlush(viewer)); 2245 2246 if (mumps->ninfo && mumps->ninfo <= 80) { 2247 PetscInt i; 2248 for (i=0; i<mumps->ninfo; i++) { 2249 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i])); 2250 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]))); 2251 PetscCall(PetscViewerFlush(viewer)); 2252 } 2253 } 2254 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2255 2256 if (!mumps->myid) { /* information from the host */ 2257 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1))); 2258 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2))); 2259 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3))); 2260 PetscCall(PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34))); 2261 2262 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3))); 2263 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4))); 2264 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5))); 2265 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6))); 2266 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7))); 2267 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8))); 2268 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9))); 2269 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10))); 2270 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11))); 2271 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12))); 2272 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13))); 2273 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14))); 2274 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15))); 2275 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16))); 2276 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17))); 2277 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18))); 2278 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19))); 2279 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20))); 2280 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21))); 2281 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22))); 2282 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23))); 2283 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24))); 2284 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25))); 2285 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28))); 2286 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29))); 2287 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31))); 2288 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32))); 2289 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33))); 2290 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34))); 2291 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35))); 2292 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36))); 2293 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37))); 2294 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38))); 2295 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39))); 2296 } 2297 } 2298 } 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2303 { 2304 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2305 2306 PetscFunctionBegin; 2307 info->block_size = 1.0; 2308 info->nz_allocated = mumps->id.INFOG(20); 2309 info->nz_used = mumps->id.INFOG(20); 2310 info->nz_unneeded = 0.0; 2311 info->assemblies = 0.0; 2312 info->mallocs = 0.0; 2313 info->memory = 0.0; 2314 info->fill_ratio_given = 0; 2315 info->fill_ratio_needed = 0; 2316 info->factor_mallocs = 0; 2317 PetscFunctionReturn(0); 2318 } 2319 2320 /* -------------------------------------------------------------------------------------------*/ 2321 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2322 { 2323 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2324 const PetscScalar *arr; 2325 const PetscInt *idxs; 2326 PetscInt size,i; 2327 2328 PetscFunctionBegin; 2329 PetscCall(ISGetLocalSize(is,&size)); 2330 /* Schur complement matrix */ 2331 PetscCall(MatDestroy(&F->schur)); 2332 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur)); 2333 PetscCall(MatDenseGetArrayRead(F->schur,&arr)); 2334 mumps->id.schur = (MumpsScalar*)arr; 2335 mumps->id.size_schur = size; 2336 mumps->id.schur_lld = size; 2337 PetscCall(MatDenseRestoreArrayRead(F->schur,&arr)); 2338 if (mumps->sym == 1) { 2339 PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE)); 2340 } 2341 2342 /* MUMPS expects Fortran style indices */ 2343 PetscCall(PetscFree(mumps->id.listvar_schur)); 2344 PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur)); 2345 PetscCall(ISGetIndices(is,&idxs)); 2346 for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]))); 2347 PetscCall(ISRestoreIndices(is,&idxs)); 2348 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2349 mumps->id.ICNTL(26) = -1; 2350 PetscFunctionReturn(0); 2351 } 2352 2353 /* -------------------------------------------------------------------------------------------*/ 2354 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2355 { 2356 Mat St; 2357 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2358 PetscScalar *array; 2359 #if defined(PETSC_USE_COMPLEX) 2360 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2361 #endif 2362 2363 PetscFunctionBegin; 2364 PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2365 PetscCall(MatCreate(PETSC_COMM_SELF,&St)); 2366 PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur)); 2367 PetscCall(MatSetType(St,MATDENSE)); 2368 PetscCall(MatSetUp(St)); 2369 PetscCall(MatDenseGetArray(St,&array)); 2370 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2371 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2372 PetscInt i,j,N=mumps->id.size_schur; 2373 for (i=0;i<N;i++) { 2374 for (j=0;j<N;j++) { 2375 #if !defined(PETSC_USE_COMPLEX) 2376 PetscScalar val = mumps->id.schur[i*N+j]; 2377 #else 2378 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2379 #endif 2380 array[j*N+i] = val; 2381 } 2382 } 2383 } else { /* stored by columns */ 2384 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2385 } 2386 } else { /* either full or lower-triangular (not packed) */ 2387 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2388 PetscInt i,j,N=mumps->id.size_schur; 2389 for (i=0;i<N;i++) { 2390 for (j=i;j<N;j++) { 2391 #if !defined(PETSC_USE_COMPLEX) 2392 PetscScalar val = mumps->id.schur[i*N+j]; 2393 #else 2394 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2395 #endif 2396 array[i*N+j] = val; 2397 array[j*N+i] = val; 2398 } 2399 } 2400 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2401 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2402 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2403 PetscInt i,j,N=mumps->id.size_schur; 2404 for (i=0;i<N;i++) { 2405 for (j=0;j<i+1;j++) { 2406 #if !defined(PETSC_USE_COMPLEX) 2407 PetscScalar val = mumps->id.schur[i*N+j]; 2408 #else 2409 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2410 #endif 2411 array[i*N+j] = val; 2412 array[j*N+i] = val; 2413 } 2414 } 2415 } 2416 } 2417 PetscCall(MatDenseRestoreArray(St,&array)); 2418 *S = St; 2419 PetscFunctionReturn(0); 2420 } 2421 2422 /* -------------------------------------------------------------------------------------------*/ 2423 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2424 { 2425 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2426 2427 PetscFunctionBegin; 2428 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 2429 PetscInt i,nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 2430 for (i = 0; i < nICNTL_pre; ++i) if (mumps->ICNTL_pre[1+2*i] == icntl) break; /* is this ICNTL already cached? */ 2431 if (i == nICNTL_pre) { /* not already cached */ 2432 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt)*(2*nICNTL_pre + 3),&mumps->ICNTL_pre)); 2433 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt)*3,&mumps->ICNTL_pre)); 2434 mumps->ICNTL_pre[0]++; 2435 } 2436 mumps->ICNTL_pre[1+2*i] = icntl; 2437 PetscCall(PetscMUMPSIntCast(ival,mumps->ICNTL_pre+2+2*i)); 2438 } else PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl))); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2443 { 2444 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2445 2446 PetscFunctionBegin; 2447 *ival = mumps->id.ICNTL(icntl); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 /*@ 2452 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2453 2454 Logically Collective on Mat 2455 2456 Input Parameters: 2457 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2458 . icntl - index of MUMPS parameter array ICNTL() 2459 - ival - value of MUMPS ICNTL(icntl) 2460 2461 Options Database: 2462 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2463 2464 Level: beginner 2465 2466 References: 2467 . * - MUMPS Users' Guide 2468 2469 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2470 @*/ 2471 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2472 { 2473 PetscFunctionBegin; 2474 PetscValidType(F,1); 2475 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2476 PetscValidLogicalCollectiveInt(F,icntl,2); 2477 PetscValidLogicalCollectiveInt(F,ival,3); 2478 PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl); 2479 PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival)); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 /*@ 2484 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2485 2486 Logically Collective on Mat 2487 2488 Input Parameters: 2489 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2490 - icntl - index of MUMPS parameter array ICNTL() 2491 2492 Output Parameter: 2493 . ival - value of MUMPS ICNTL(icntl) 2494 2495 Level: beginner 2496 2497 References: 2498 . * - MUMPS Users' Guide 2499 2500 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2501 @*/ 2502 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2503 { 2504 PetscFunctionBegin; 2505 PetscValidType(F,1); 2506 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2507 PetscValidLogicalCollectiveInt(F,icntl,2); 2508 PetscValidIntPointer(ival,3); 2509 PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl); 2510 PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 /* -------------------------------------------------------------------------------------------*/ 2515 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2516 { 2517 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2518 2519 PetscFunctionBegin; 2520 if (mumps->id.job == JOB_NULL) { 2521 PetscInt i,nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2522 for (i = 0; i < nCNTL_pre; ++i) if (mumps->CNTL_pre[1+2*i] == icntl) break; 2523 if (i == nCNTL_pre) { 2524 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal)*(2*nCNTL_pre + 3),&mumps->CNTL_pre)); 2525 else PetscCall(PetscCalloc(sizeof(PetscReal)*3,&mumps->CNTL_pre)); 2526 mumps->CNTL_pre[0]++; 2527 } 2528 mumps->CNTL_pre[1+2*i] = icntl; 2529 mumps->CNTL_pre[2+2*i] = val; 2530 } else mumps->id.CNTL(icntl) = val; 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2535 { 2536 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2537 2538 PetscFunctionBegin; 2539 *val = mumps->id.CNTL(icntl); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 /*@ 2544 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2545 2546 Logically Collective on Mat 2547 2548 Input Parameters: 2549 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2550 . icntl - index of MUMPS parameter array CNTL() 2551 - val - value of MUMPS CNTL(icntl) 2552 2553 Options Database: 2554 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2555 2556 Level: beginner 2557 2558 References: 2559 . * - MUMPS Users' Guide 2560 2561 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2562 @*/ 2563 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2564 { 2565 PetscFunctionBegin; 2566 PetscValidType(F,1); 2567 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2568 PetscValidLogicalCollectiveInt(F,icntl,2); 2569 PetscValidLogicalCollectiveReal(F,val,3); 2570 PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl); 2571 PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val)); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 /*@ 2576 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2577 2578 Logically Collective on Mat 2579 2580 Input Parameters: 2581 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2582 - icntl - index of MUMPS parameter array CNTL() 2583 2584 Output Parameter: 2585 . val - value of MUMPS CNTL(icntl) 2586 2587 Level: beginner 2588 2589 References: 2590 . * - MUMPS Users' Guide 2591 2592 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2593 @*/ 2594 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2595 { 2596 PetscFunctionBegin; 2597 PetscValidType(F,1); 2598 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2599 PetscValidLogicalCollectiveInt(F,icntl,2); 2600 PetscValidRealPointer(val,3); 2601 PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl); 2602 PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2607 { 2608 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2609 2610 PetscFunctionBegin; 2611 *info = mumps->id.INFO(icntl); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2616 { 2617 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2618 2619 PetscFunctionBegin; 2620 *infog = mumps->id.INFOG(icntl); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2625 { 2626 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2627 2628 PetscFunctionBegin; 2629 *rinfo = mumps->id.RINFO(icntl); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2634 { 2635 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2636 2637 PetscFunctionBegin; 2638 *rinfog = mumps->id.RINFOG(icntl); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2643 { 2644 Mat Bt = NULL,Btseq = NULL; 2645 PetscBool flg; 2646 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2647 PetscScalar *aa; 2648 PetscInt spnr,*ia,*ja,M,nrhs; 2649 2650 PetscFunctionBegin; 2651 PetscValidPointer(spRHS,2); 2652 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg)); 2653 if (flg) { 2654 PetscCall(MatTransposeGetMat(spRHS,&Bt)); 2655 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2656 2657 PetscCall(MatMumpsSetIcntl(F,30,1)); 2658 2659 if (mumps->petsc_size > 1) { 2660 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2661 Btseq = b->A; 2662 } else { 2663 Btseq = Bt; 2664 } 2665 2666 PetscCall(MatGetSize(spRHS,&M,&nrhs)); 2667 mumps->id.nrhs = nrhs; 2668 mumps->id.lrhs = M; 2669 mumps->id.rhs = NULL; 2670 2671 if (!mumps->myid) { 2672 PetscCall(MatSeqAIJGetArray(Btseq,&aa)); 2673 PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2674 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2675 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 2676 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2677 } else { 2678 mumps->id.irhs_ptr = NULL; 2679 mumps->id.irhs_sparse = NULL; 2680 mumps->id.nz_rhs = 0; 2681 mumps->id.rhs_sparse = NULL; 2682 } 2683 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2684 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2685 2686 /* solve phase */ 2687 /*-------------*/ 2688 mumps->id.job = JOB_SOLVE; 2689 PetscMUMPS_c(mumps); 2690 if (mumps->id.INFOG(1) < 0) 2691 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2692 2693 if (!mumps->myid) { 2694 PetscCall(MatSeqAIJRestoreArray(Btseq,&aa)); 2695 PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2696 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2697 } 2698 PetscFunctionReturn(0); 2699 } 2700 2701 /*@ 2702 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2703 2704 Logically Collective on Mat 2705 2706 Input Parameters: 2707 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2708 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2709 2710 Output Parameter: 2711 . spRHS - requested entries of inverse of A 2712 2713 Level: beginner 2714 2715 References: 2716 . * - MUMPS Users' Guide 2717 2718 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2719 @*/ 2720 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2721 { 2722 PetscFunctionBegin; 2723 PetscValidType(F,1); 2724 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2725 PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS)); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2730 { 2731 Mat spRHS; 2732 2733 PetscFunctionBegin; 2734 PetscCall(MatCreateTranspose(spRHST,&spRHS)); 2735 PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS)); 2736 PetscCall(MatDestroy(&spRHS)); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 /*@ 2741 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2742 2743 Logically Collective on Mat 2744 2745 Input Parameters: 2746 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2747 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2748 2749 Output Parameter: 2750 . spRHST - requested entries of inverse of A^T 2751 2752 Level: beginner 2753 2754 References: 2755 . * - MUMPS Users' Guide 2756 2757 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2758 @*/ 2759 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2760 { 2761 PetscBool flg; 2762 2763 PetscFunctionBegin; 2764 PetscValidType(F,1); 2765 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2766 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 2767 PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2768 2769 PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST)); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 /*@ 2774 MatMumpsGetInfo - Get MUMPS parameter INFO() 2775 2776 Logically Collective on Mat 2777 2778 Input Parameters: 2779 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2780 - icntl - index of MUMPS parameter array INFO() 2781 2782 Output Parameter: 2783 . ival - value of MUMPS INFO(icntl) 2784 2785 Level: beginner 2786 2787 References: 2788 . * - MUMPS Users' Guide 2789 2790 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2791 @*/ 2792 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2793 { 2794 PetscFunctionBegin; 2795 PetscValidType(F,1); 2796 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2797 PetscValidIntPointer(ival,3); 2798 PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 /*@ 2803 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2804 2805 Logically Collective on Mat 2806 2807 Input Parameters: 2808 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2809 - icntl - index of MUMPS parameter array INFOG() 2810 2811 Output Parameter: 2812 . ival - value of MUMPS INFOG(icntl) 2813 2814 Level: beginner 2815 2816 References: 2817 . * - MUMPS Users' Guide 2818 2819 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2820 @*/ 2821 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2822 { 2823 PetscFunctionBegin; 2824 PetscValidType(F,1); 2825 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2826 PetscValidIntPointer(ival,3); 2827 PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 /*@ 2832 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2833 2834 Logically Collective on Mat 2835 2836 Input Parameters: 2837 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2838 - icntl - index of MUMPS parameter array RINFO() 2839 2840 Output Parameter: 2841 . val - value of MUMPS RINFO(icntl) 2842 2843 Level: beginner 2844 2845 References: 2846 . * - MUMPS Users' Guide 2847 2848 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2849 @*/ 2850 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2851 { 2852 PetscFunctionBegin; 2853 PetscValidType(F,1); 2854 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2855 PetscValidRealPointer(val,3); 2856 PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2857 PetscFunctionReturn(0); 2858 } 2859 2860 /*@ 2861 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2862 2863 Logically Collective on Mat 2864 2865 Input Parameters: 2866 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2867 - icntl - index of MUMPS parameter array RINFOG() 2868 2869 Output Parameter: 2870 . val - value of MUMPS RINFOG(icntl) 2871 2872 Level: beginner 2873 2874 References: 2875 . * - MUMPS Users' Guide 2876 2877 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2878 @*/ 2879 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2880 { 2881 PetscFunctionBegin; 2882 PetscValidType(F,1); 2883 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2884 PetscValidRealPointer(val,3); 2885 PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2886 PetscFunctionReturn(0); 2887 } 2888 2889 /*MC 2890 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2891 distributed and sequential matrices via the external package MUMPS. 2892 2893 Works with MATAIJ and MATSBAIJ matrices 2894 2895 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2896 2897 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2898 2899 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2900 2901 Options Database Keys: 2902 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2903 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2904 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2905 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2906 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2907 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2908 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2909 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2910 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2911 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2912 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2913 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2914 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2915 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2916 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2917 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2918 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2919 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2920 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2921 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2922 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2923 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2924 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2925 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2926 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2927 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2928 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2929 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2930 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2931 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2932 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2933 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2934 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2935 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2936 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2937 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2938 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2939 2940 Level: beginner 2941 2942 Notes: 2943 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2944 2945 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2946 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2947 2948 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2949 $ KSPGetPC(ksp,&pc); 2950 $ PCFactorGetMatrix(pc,&mat); 2951 $ MatMumpsGetInfo(mat,....); 2952 $ MatMumpsGetInfog(mat,....); etc. 2953 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2954 2955 Using MUMPS with 64-bit integers 2956 MUMPS provides 64-bit integer support in two build modes: 2957 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2958 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2959 2960 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2961 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2962 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2963 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2964 2965 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2966 2967 Two modes to run MUMPS/PETSc with OpenMP 2968 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2969 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2970 2971 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2972 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2973 2974 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2975 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2976 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2977 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2978 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2979 2980 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2981 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2982 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2983 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2984 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2985 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2986 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2987 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2988 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2989 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2990 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2991 examine the mapping result. 2992 2993 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2994 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2995 calls omp_set_num_threads(m) internally before calling MUMPS. 2996 2997 References: 2998 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2999 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 3000 3001 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3002 3003 M*/ 3004 3005 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 3006 { 3007 PetscFunctionBegin; 3008 *type = MATSOLVERMUMPS; 3009 PetscFunctionReturn(0); 3010 } 3011 3012 /* MatGetFactor for Seq and MPI AIJ matrices */ 3013 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 3014 { 3015 Mat B; 3016 Mat_MUMPS *mumps; 3017 PetscBool isSeqAIJ; 3018 PetscMPIInt size; 3019 3020 PetscFunctionBegin; 3021 #if defined(PETSC_USE_COMPLEX) 3022 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3023 #endif 3024 /* Create the factorization matrix */ 3025 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ)); 3026 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3027 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3028 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3029 PetscCall(MatSetUp(B)); 3030 3031 PetscCall(PetscNewLog(B,&mumps)); 3032 3033 B->ops->view = MatView_MUMPS; 3034 B->ops->getinfo = MatGetInfo_MUMPS; 3035 3036 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3037 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3038 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3039 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3040 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3041 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3042 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3043 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3044 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3045 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3046 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3047 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3048 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3049 3050 if (ftype == MAT_FACTOR_LU) { 3051 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3052 B->factortype = MAT_FACTOR_LU; 3053 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3054 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3055 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3056 mumps->sym = 0; 3057 } else { 3058 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3059 B->factortype = MAT_FACTOR_CHOLESKY; 3060 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3061 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3062 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3063 #if defined(PETSC_USE_COMPLEX) 3064 mumps->sym = 2; 3065 #else 3066 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3067 else mumps->sym = 2; 3068 #endif 3069 } 3070 3071 /* set solvertype */ 3072 PetscCall(PetscFree(B->solvertype)); 3073 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3074 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3075 if (size == 1) { 3076 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3077 B->canuseordering = PETSC_TRUE; 3078 } 3079 B->ops->destroy = MatDestroy_MUMPS; 3080 B->data = (void*)mumps; 3081 3082 *F = B; 3083 mumps->id.job = JOB_NULL; 3084 mumps->ICNTL_pre = NULL; 3085 mumps->CNTL_pre = NULL; 3086 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3087 PetscFunctionReturn(0); 3088 } 3089 3090 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3091 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3092 { 3093 Mat B; 3094 Mat_MUMPS *mumps; 3095 PetscBool isSeqSBAIJ; 3096 PetscMPIInt size; 3097 3098 PetscFunctionBegin; 3099 #if defined(PETSC_USE_COMPLEX) 3100 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3101 #endif 3102 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3103 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3104 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3105 PetscCall(MatSetUp(B)); 3106 3107 PetscCall(PetscNewLog(B,&mumps)); 3108 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ)); 3109 if (isSeqSBAIJ) { 3110 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3111 } else { 3112 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3113 } 3114 3115 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3116 B->ops->view = MatView_MUMPS; 3117 B->ops->getinfo = MatGetInfo_MUMPS; 3118 3119 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3120 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3121 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3122 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3123 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3124 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3125 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3126 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3127 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3128 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3129 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3130 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3131 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3132 3133 B->factortype = MAT_FACTOR_CHOLESKY; 3134 #if defined(PETSC_USE_COMPLEX) 3135 mumps->sym = 2; 3136 #else 3137 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 3138 else mumps->sym = 2; 3139 #endif 3140 3141 /* set solvertype */ 3142 PetscCall(PetscFree(B->solvertype)); 3143 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3144 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3145 if (size == 1) { 3146 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3147 B->canuseordering = PETSC_TRUE; 3148 } 3149 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3150 B->ops->destroy = MatDestroy_MUMPS; 3151 B->data = (void*)mumps; 3152 3153 *F = B; 3154 mumps->id.job = JOB_NULL; 3155 mumps->ICNTL_pre = NULL; 3156 mumps->CNTL_pre = NULL; 3157 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3158 PetscFunctionReturn(0); 3159 } 3160 3161 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3162 { 3163 Mat B; 3164 Mat_MUMPS *mumps; 3165 PetscBool isSeqBAIJ; 3166 PetscMPIInt size; 3167 3168 PetscFunctionBegin; 3169 /* Create the factorization matrix */ 3170 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ)); 3171 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3172 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3173 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3174 PetscCall(MatSetUp(B)); 3175 3176 PetscCall(PetscNewLog(B,&mumps)); 3177 if (ftype == MAT_FACTOR_LU) { 3178 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3179 B->factortype = MAT_FACTOR_LU; 3180 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3181 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3182 mumps->sym = 0; 3183 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3184 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3185 3186 B->ops->view = MatView_MUMPS; 3187 B->ops->getinfo = MatGetInfo_MUMPS; 3188 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3202 3203 /* set solvertype */ 3204 PetscCall(PetscFree(B->solvertype)); 3205 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3206 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3207 if (size == 1) { 3208 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3209 B->canuseordering = PETSC_TRUE; 3210 } 3211 B->ops->destroy = MatDestroy_MUMPS; 3212 B->data = (void*)mumps; 3213 3214 *F = B; 3215 mumps->id.job = JOB_NULL; 3216 mumps->ICNTL_pre = NULL; 3217 mumps->CNTL_pre = NULL; 3218 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3219 PetscFunctionReturn(0); 3220 } 3221 3222 /* MatGetFactor for Seq and MPI SELL matrices */ 3223 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3224 { 3225 Mat B; 3226 Mat_MUMPS *mumps; 3227 PetscBool isSeqSELL; 3228 PetscMPIInt size; 3229 3230 PetscFunctionBegin; 3231 /* Create the factorization matrix */ 3232 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL)); 3233 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3234 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3235 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3236 PetscCall(MatSetUp(B)); 3237 3238 PetscCall(PetscNewLog(B,&mumps)); 3239 3240 B->ops->view = MatView_MUMPS; 3241 B->ops->getinfo = MatGetInfo_MUMPS; 3242 3243 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3244 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3245 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3246 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3247 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3248 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3249 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3250 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3251 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3252 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3253 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3254 3255 if (ftype == MAT_FACTOR_LU) { 3256 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3257 B->factortype = MAT_FACTOR_LU; 3258 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3259 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3260 mumps->sym = 0; 3261 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3262 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3263 3264 /* set solvertype */ 3265 PetscCall(PetscFree(B->solvertype)); 3266 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3267 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3268 if (size == 1) { 3269 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3270 B->canuseordering = PETSC_TRUE; 3271 } 3272 B->ops->destroy = MatDestroy_MUMPS; 3273 B->data = (void*)mumps; 3274 3275 *F = B; 3276 mumps->id.job = JOB_NULL; 3277 mumps->ICNTL_pre = NULL; 3278 mumps->CNTL_pre = NULL; 3279 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3280 PetscFunctionReturn(0); 3281 } 3282 3283 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3284 { 3285 PetscFunctionBegin; 3286 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3287 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3288 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3289 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3290 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3291 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3292 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3293 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3294 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3295 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3296 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps)); 3297 PetscFunctionReturn(0); 3298 } 3299