1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 #if PetscDefined(USE_64BIT_INDICES) 71 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 72 #endif 73 *b = (PetscMUMPSInt)(a); 74 PetscFunctionReturn(0); 75 } 76 77 /* Put these utility routines here since they are only used in this file */ 78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 79 { 80 PetscInt myval; 81 PetscBool myset; 82 PetscFunctionBegin; 83 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 84 PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub)); 85 if (myset) PetscCall(PetscMUMPSIntCast(myval,value)); 86 if (set) *set = myset; 87 PetscFunctionReturn(0); 88 } 89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 90 91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 92 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 93 #define PetscMUMPS_c(mumps) \ 94 do { \ 95 if (mumps->use_petsc_omp_support) { \ 96 if (mumps->is_omp_master) { \ 97 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 98 MUMPS_c(&mumps->id); \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 100 } \ 101 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 102 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 103 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 104 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 105 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 106 */ \ 107 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\ 108 PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm));\ 109 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm));\ 110 } else { \ 111 MUMPS_c(&mumps->id); \ 112 } \ 113 } while (0) 114 #else 115 #define PetscMUMPS_c(mumps) \ 116 do { MUMPS_c(&mumps->id); } while (0) 117 #endif 118 119 /* declare MumpsScalar */ 120 #if defined(PETSC_USE_COMPLEX) 121 #if defined(PETSC_USE_REAL_SINGLE) 122 #define MumpsScalar mumps_complex 123 #else 124 #define MumpsScalar mumps_double_complex 125 #endif 126 #else 127 #define MumpsScalar PetscScalar 128 #endif 129 130 /* macros s.t. indices match MUMPS documentation */ 131 #define ICNTL(I) icntl[(I)-1] 132 #define CNTL(I) cntl[(I)-1] 133 #define INFOG(I) infog[(I)-1] 134 #define INFO(I) info[(I)-1] 135 #define RINFOG(I) rinfog[(I)-1] 136 #define RINFO(I) rinfo[(I)-1] 137 138 typedef struct Mat_MUMPS Mat_MUMPS; 139 struct Mat_MUMPS { 140 #if defined(PETSC_USE_COMPLEX) 141 #if defined(PETSC_USE_REAL_SINGLE) 142 CMUMPS_STRUC_C id; 143 #else 144 ZMUMPS_STRUC_C id; 145 #endif 146 #else 147 #if defined(PETSC_USE_REAL_SINGLE) 148 SMUMPS_STRUC_C id; 149 #else 150 DMUMPS_STRUC_C id; 151 #endif 152 #endif 153 154 MatStructure matstruc; 155 PetscMPIInt myid,petsc_size; 156 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 157 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 158 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 159 PetscMUMPSInt sym; 160 MPI_Comm mumps_comm; 161 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 162 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 163 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 164 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 165 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 166 PetscInt *rhs_nrow,max_nrhs; 167 PetscMPIInt *rhs_recvcounts,*rhs_disps; 168 PetscScalar *rhs_loc,*rhs_recvbuf; 169 #endif 170 Vec b_seq,x_seq; 171 PetscInt ninfo,*info; /* which INFO to display */ 172 PetscInt sizeredrhs; 173 PetscScalar *schur_sol; 174 PetscInt schur_sizesol; 175 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 176 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 177 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 178 179 /* stuff used by petsc/mumps OpenMP support*/ 180 PetscBool use_petsc_omp_support; 181 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 182 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 183 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 184 PetscMPIInt tag,omp_comm_size; 185 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 186 MPI_Request *reqs; 187 }; 188 189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 190 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 191 */ 192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 193 { 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 PetscCall(PetscFree(mumps->ia_alloc)); 202 PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc)); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 PetscCall(PetscFree(mumps->ja_alloc)); 207 PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc)); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]))); 211 for (i=0; i<nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]))); 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps)); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscFunctionBegin; 226 PetscCall(PetscFree(mumps->id.listvar_schur)); 227 PetscCall(PetscFree(mumps->id.redrhs)); 228 PetscCall(PetscFree(mumps->schur_sol)); 229 mumps->id.size_schur = 0; 230 mumps->id.schur_lld = 0; 231 mumps->id.ICNTL(19) = 0; 232 PetscFunctionReturn(0); 233 } 234 235 /* solve with rhs in mumps->id.redrhs and return in the same location */ 236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 237 { 238 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 239 Mat S,B,X; 240 MatFactorSchurStatus schurstatus; 241 PetscInt sizesol; 242 243 PetscFunctionBegin; 244 PetscCall(MatFactorFactorizeSchurComplement(F)); 245 PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus)); 246 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B)); 247 PetscCall(MatSetType(B,((PetscObject)S)->type_name)); 248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 249 PetscCall(MatBindToCPU(B,S->boundtocpu)); 250 #endif 251 switch (schurstatus) { 252 case MAT_FACTOR_SCHUR_FACTORED: 253 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X)); 254 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 256 PetscCall(MatBindToCPU(X,S->boundtocpu)); 257 #endif 258 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 259 PetscCall(MatMatSolveTranspose(S,B,X)); 260 } else { 261 PetscCall(MatMatSolve(S,B,X)); 262 } 263 break; 264 case MAT_FACTOR_SCHUR_INVERTED: 265 sizesol = mumps->id.nrhs*mumps->id.size_schur; 266 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 267 PetscCall(PetscFree(mumps->schur_sol)); 268 PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol)); 269 mumps->schur_sizesol = sizesol; 270 } 271 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X)); 272 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 274 PetscCall(MatBindToCPU(X,S->boundtocpu)); 275 #endif 276 PetscCall(MatProductCreateWithMat(S,B,NULL,X)); 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatProductSetType(X,MATPRODUCT_AtB)); 279 } else { 280 PetscCall(MatProductSetType(X,MATPRODUCT_AB)); 281 } 282 PetscCall(MatProductSetFromOptions(X)); 283 PetscCall(MatProductSymbolic(X)); 284 PetscCall(MatProductNumeric(X)); 285 286 PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN)); 287 break; 288 default: 289 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 290 } 291 PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus)); 292 PetscCall(MatDestroy(&B)); 293 PetscCall(MatDestroy(&X)); 294 PetscFunctionReturn(0); 295 } 296 297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 298 { 299 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 300 301 PetscFunctionBegin; 302 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 303 PetscFunctionReturn(0); 304 } 305 if (!expansion) { /* prepare for the condensation step */ 306 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 307 /* allocate MUMPS internal array to store reduced right-hand sides */ 308 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 309 PetscCall(PetscFree(mumps->id.redrhs)); 310 mumps->id.lredrhs = mumps->id.size_schur; 311 PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs)); 312 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 313 } 314 mumps->id.ICNTL(26) = 1; /* condensation phase */ 315 } else { /* prepare for the expansion step */ 316 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 317 PetscCall(MatMumpsSolveSchur_Private(F)); 318 mumps->id.ICNTL(26) = 2; /* expansion phase */ 319 PetscMUMPS_c(mumps); 320 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 321 /* restore defaults */ 322 mumps->id.ICNTL(26) = -1; 323 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 324 if (mumps->id.nrhs > 1) { 325 PetscCall(PetscFree(mumps->id.redrhs)); 326 mumps->id.lredrhs = 0; 327 mumps->sizeredrhs = 0; 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 /* 334 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 335 336 input: 337 A - matrix in aij,baij or sbaij format 338 shift - 0: C style output triple; 1: Fortran style output triple. 339 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 340 MAT_REUSE_MATRIX: only the values in v array are updated 341 output: 342 nnz - dim of r, c, and v (number of local nonzero entries of A) 343 r, c, v - row and col index, matrix values (matrix triples) 344 345 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 346 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 347 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 348 349 */ 350 351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 352 { 353 const PetscScalar *av; 354 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 355 PetscInt64 nz,rnz,i,j,k; 356 PetscMUMPSInt *row,*col; 357 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 358 359 PetscFunctionBegin; 360 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 361 mumps->val = (PetscScalar*)av; 362 if (reuse == MAT_INITIAL_MATRIX) { 363 nz = aa->nz; 364 ai = aa->i; 365 aj = aa->j; 366 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 367 for (i=k=0; i<M; i++) { 368 rnz = ai[i+1] - ai[i]; 369 ajj = aj + ai[i]; 370 for (j=0; j<rnz; j++) { 371 PetscCall(PetscMUMPSIntCast(i+shift,&row[k])); 372 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k])); 373 k++; 374 } 375 } 376 mumps->irn = row; 377 mumps->jcn = col; 378 mumps->nnz = nz; 379 } 380 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 381 PetscFunctionReturn(0); 382 } 383 384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 385 { 386 PetscInt64 nz,i,j,k,r; 387 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 388 PetscMUMPSInt *row,*col; 389 390 PetscFunctionBegin; 391 mumps->val = a->val; 392 if (reuse == MAT_INITIAL_MATRIX) { 393 nz = a->sliidx[a->totalslices]; 394 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 395 for (i=k=0; i<a->totalslices; i++) { 396 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 397 PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++])); 398 } 399 } 400 for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i])); 401 mumps->irn = row; 402 mumps->jcn = col; 403 mumps->nnz = nz; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 409 { 410 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 411 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 412 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 413 PetscInt bs; 414 PetscMUMPSInt *row,*col; 415 416 PetscFunctionBegin; 417 PetscCall(MatGetBlockSize(A,&bs)); 418 M = A->rmap->N/bs; 419 mumps->val = aa->a; 420 if (reuse == MAT_INITIAL_MATRIX) { 421 ai = aa->i; aj = aa->j; 422 nz = bs2*aa->nz; 423 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 424 for (i=0; i<M; i++) { 425 ajj = aj + ai[i]; 426 rnz = ai[i+1] - ai[i]; 427 for (k=0; k<rnz; k++) { 428 for (j=0; j<bs; j++) { 429 for (m=0; m<bs; m++) { 430 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx])); 431 PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx])); 432 idx++; 433 } 434 } 435 } 436 } 437 mumps->irn = row; 438 mumps->jcn = col; 439 mumps->nnz = nz; 440 } 441 PetscFunctionReturn(0); 442 } 443 444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 445 { 446 const PetscInt *ai, *aj,*ajj; 447 PetscInt bs; 448 PetscInt64 nz,rnz,i,j,k,m; 449 PetscMUMPSInt *row,*col; 450 PetscScalar *val; 451 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 452 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 453 #if defined(PETSC_USE_COMPLEX) 454 PetscBool hermitian; 455 #endif 456 457 PetscFunctionBegin; 458 #if defined(PETSC_USE_COMPLEX) 459 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 460 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 461 #endif 462 ai = aa->i; 463 aj = aa->j; 464 PetscCall(MatGetBlockSize(A,&bs)); 465 if (reuse == MAT_INITIAL_MATRIX) { 466 nz = aa->nz; 467 PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col)); 468 if (bs>1) { 469 PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc)); 470 mumps->val = mumps->val_alloc; 471 } else { 472 mumps->val = aa->a; 473 } 474 mumps->irn = row; 475 mumps->jcn = col; 476 } else { 477 if (bs == 1) mumps->val = aa->a; 478 row = mumps->irn; 479 col = mumps->jcn; 480 } 481 val = mumps->val; 482 483 nz = 0; 484 if (bs>1) { 485 for (i=0; i<mbs; i++) { 486 rnz = ai[i+1] - ai[i]; 487 ajj = aj + ai[i]; 488 for (j=0; j<rnz; j++) { 489 for (k=0; k<bs; k++) { 490 for (m=0; m<bs; m++) { 491 if (ajj[j]>i || k>=m) { 492 if (reuse == MAT_INITIAL_MATRIX) { 493 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz])); 494 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz])); 495 } 496 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 497 } 498 } 499 } 500 } 501 } 502 } else if (reuse == MAT_INITIAL_MATRIX) { 503 for (i=0; i<mbs; i++) { 504 rnz = ai[i+1] - ai[i]; 505 ajj = aj + ai[i]; 506 for (j=0; j<rnz; j++) { 507 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 508 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 509 nz++; 510 } 511 } 512 PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 513 } 514 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 515 PetscFunctionReturn(0); 516 } 517 518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 519 { 520 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 521 PetscInt64 nz,rnz,i,j; 522 const PetscScalar *av,*v1; 523 PetscScalar *val; 524 PetscMUMPSInt *row,*col; 525 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 526 PetscBool missing; 527 #if defined(PETSC_USE_COMPLEX) 528 PetscBool hermitian; 529 #endif 530 531 PetscFunctionBegin; 532 #if defined(PETSC_USE_COMPLEX) 533 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 534 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 535 #endif 536 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 537 ai = aa->i; aj = aa->j; 538 adiag = aa->diag; 539 PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL)); 540 if (reuse == MAT_INITIAL_MATRIX) { 541 /* count nz in the upper triangular part of A */ 542 nz = 0; 543 if (missing) { 544 for (i=0; i<M; i++) { 545 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 546 for (j=ai[i];j<ai[i+1];j++) { 547 if (aj[j] < i) continue; 548 nz++; 549 } 550 } else { 551 nz += ai[i+1] - adiag[i]; 552 } 553 } 554 } else { 555 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 556 } 557 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 558 PetscCall(PetscMalloc1(nz,&val)); 559 mumps->nnz = nz; 560 mumps->irn = row; 561 mumps->jcn = col; 562 mumps->val = mumps->val_alloc = val; 563 564 nz = 0; 565 if (missing) { 566 for (i=0; i<M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 568 for (j=ai[i];j<ai[i+1];j++) { 569 if (aj[j] < i) continue; 570 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 571 PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz])); 572 val[nz] = av[j]; 573 nz++; 574 } 575 } else { 576 rnz = ai[i+1] - adiag[i]; 577 ajj = aj + adiag[i]; 578 v1 = av + adiag[i]; 579 for (j=0; j<rnz; j++) { 580 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 581 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 582 val[nz++] = v1[j]; 583 } 584 } 585 } 586 } else { 587 for (i=0; i<M; i++) { 588 rnz = ai[i+1] - adiag[i]; 589 ajj = aj + adiag[i]; 590 v1 = av + adiag[i]; 591 for (j=0; j<rnz; j++) { 592 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 593 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 594 val[nz++] = v1[j]; 595 } 596 } 597 } 598 } else { 599 nz = 0; 600 val = mumps->val; 601 if (missing) { 602 for (i=0; i <M; i++) { 603 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 604 for (j=ai[i];j<ai[i+1];j++) { 605 if (aj[j] < i) continue; 606 val[nz++] = av[j]; 607 } 608 } else { 609 rnz = ai[i+1] - adiag[i]; 610 v1 = av + adiag[i]; 611 for (j=0; j<rnz; j++) { 612 val[nz++] = v1[j]; 613 } 614 } 615 } 616 } else { 617 for (i=0; i <M; i++) { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } 626 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 631 { 632 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 633 PetscInt bs; 634 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 635 PetscMUMPSInt *row,*col; 636 const PetscScalar *av,*bv,*v1,*v2; 637 PetscScalar *val; 638 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 639 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 640 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 641 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 642 #if defined(PETSC_USE_COMPLEX) 643 PetscBool hermitian; 644 #endif 645 646 PetscFunctionBegin; 647 #if defined(PETSC_USE_COMPLEX) 648 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 649 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 650 #endif 651 PetscCall(MatGetBlockSize(A,&bs)); 652 rstart = A->rmap->rstart; 653 ai = aa->i; 654 aj = aa->j; 655 bi = bb->i; 656 bj = bb->j; 657 av = aa->a; 658 bv = bb->a; 659 660 garray = mat->garray; 661 662 if (reuse == MAT_INITIAL_MATRIX) { 663 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 664 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 665 PetscCall(PetscMalloc1(nz,&val)); 666 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 667 mumps->irn = row; 668 mumps->jcn = col; 669 mumps->val = mumps->val_alloc = val; 670 } else { 671 val = mumps->val; 672 } 673 674 jj = 0; irow = rstart; 675 for (i=0; i<mbs; i++) { 676 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 677 countA = ai[i+1] - ai[i]; 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 v1 = av + ai[i]*bs2; 681 v2 = bv + bi[i]*bs2; 682 683 if (bs>1) { 684 /* A-part */ 685 for (j=0; j<countA; j++) { 686 for (k=0; k<bs; k++) { 687 for (m=0; m<bs; m++) { 688 if (rstart + ajj[j]*bs>irow || k>=m) { 689 if (reuse == MAT_INITIAL_MATRIX) { 690 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 691 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj])); 692 } 693 val[jj++] = v1[j*bs2 + m + k*bs]; 694 } 695 } 696 } 697 } 698 699 /* B-part */ 700 for (j=0; j < countB; j++) { 701 for (k=0; k<bs; k++) { 702 for (m=0; m<bs; m++) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 705 PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj])); 706 } 707 val[jj++] = v2[j*bs2 + m + k*bs]; 708 } 709 } 710 } 711 } else { 712 /* A-part */ 713 for (j=0; j<countA; j++) { 714 if (reuse == MAT_INITIAL_MATRIX) { 715 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 716 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 717 } 718 val[jj++] = v1[j]; 719 } 720 721 /* B-part */ 722 for (j=0; j < countB; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 726 } 727 val[jj++] = v2[j]; 728 } 729 } 730 irow+=bs; 731 } 732 mumps->nnz = jj; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 737 { 738 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 739 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 740 PetscMUMPSInt *row,*col; 741 const PetscScalar *av, *bv,*v1,*v2; 742 PetscScalar *val; 743 Mat Ad,Ao; 744 Mat_SeqAIJ *aa; 745 Mat_SeqAIJ *bb; 746 747 PetscFunctionBegin; 748 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 749 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 750 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 751 752 aa = (Mat_SeqAIJ*)(Ad)->data; 753 bb = (Mat_SeqAIJ*)(Ao)->data; 754 ai = aa->i; 755 aj = aa->j; 756 bi = bb->i; 757 bj = bb->j; 758 759 rstart = A->rmap->rstart; 760 761 if (reuse == MAT_INITIAL_MATRIX) { 762 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 763 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 764 PetscCall(PetscMalloc1(nz,&val)); 765 mumps->nnz = nz; 766 mumps->irn = row; 767 mumps->jcn = col; 768 mumps->val = mumps->val_alloc = val; 769 } else { 770 val = mumps->val; 771 } 772 773 jj = 0; irow = rstart; 774 for (i=0; i<m; i++) { 775 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 776 countA = ai[i+1] - ai[i]; 777 countB = bi[i+1] - bi[i]; 778 bjj = bj + bi[i]; 779 v1 = av + ai[i]; 780 v2 = bv + bi[i]; 781 782 /* A-part */ 783 for (j=0; j<countA; j++) { 784 if (reuse == MAT_INITIAL_MATRIX) { 785 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 786 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 787 } 788 val[jj++] = v1[j]; 789 } 790 791 /* B-part */ 792 for (j=0; j < countB; j++) { 793 if (reuse == MAT_INITIAL_MATRIX) { 794 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 795 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 796 } 797 val[jj++] = v2[j]; 798 } 799 irow++; 800 } 801 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 802 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 803 PetscFunctionReturn(0); 804 } 805 806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 807 { 808 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 809 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 810 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 811 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 812 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 813 const PetscInt bs2=mat->bs2; 814 PetscInt bs; 815 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 816 PetscMUMPSInt *row,*col; 817 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 818 PetscScalar *val; 819 820 PetscFunctionBegin; 821 PetscCall(MatGetBlockSize(A,&bs)); 822 if (reuse == MAT_INITIAL_MATRIX) { 823 nz = bs2*(aa->nz + bb->nz); 824 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 825 PetscCall(PetscMalloc1(nz,&val)); 826 mumps->nnz = nz; 827 mumps->irn = row; 828 mumps->jcn = col; 829 mumps->val = mumps->val_alloc = val; 830 } else { 831 val = mumps->val; 832 } 833 834 jj = 0; irow = rstart; 835 for (i=0; i<mbs; i++) { 836 countA = ai[i+1] - ai[i]; 837 countB = bi[i+1] - bi[i]; 838 ajj = aj + ai[i]; 839 bjj = bj + bi[i]; 840 v1 = av + bs2*ai[i]; 841 v2 = bv + bs2*bi[i]; 842 843 idx = 0; 844 /* A-part */ 845 for (k=0; k<countA; k++) { 846 for (j=0; j<bs; j++) { 847 for (n=0; n<bs; n++) { 848 if (reuse == MAT_INITIAL_MATRIX) { 849 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 850 PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj])); 851 } 852 val[jj++] = v1[idx++]; 853 } 854 } 855 } 856 857 idx = 0; 858 /* B-part */ 859 for (k=0; k<countB; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 864 PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj])); 865 } 866 val[jj++] = v2[idx++]; 867 } 868 } 869 } 870 irow += bs; 871 } 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 876 { 877 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 878 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 879 PetscMUMPSInt *row,*col; 880 const PetscScalar *av, *bv,*v1,*v2; 881 PetscScalar *val; 882 Mat Ad,Ao; 883 Mat_SeqAIJ *aa; 884 Mat_SeqAIJ *bb; 885 #if defined(PETSC_USE_COMPLEX) 886 PetscBool hermitian; 887 #endif 888 889 PetscFunctionBegin; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 892 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 893 #endif 894 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 895 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 896 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 897 898 aa = (Mat_SeqAIJ*)(Ad)->data; 899 bb = (Mat_SeqAIJ*)(Ao)->data; 900 ai = aa->i; 901 aj = aa->j; 902 adiag = aa->diag; 903 bi = bb->i; 904 bj = bb->j; 905 906 rstart = A->rmap->rstart; 907 908 if (reuse == MAT_INITIAL_MATRIX) { 909 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 910 nzb = 0; /* num of upper triangular entries in mat->B */ 911 for (i=0; i<m; i++) { 912 nza += (ai[i+1] - adiag[i]); 913 countB = bi[i+1] - bi[i]; 914 bjj = bj + bi[i]; 915 for (j=0; j<countB; j++) { 916 if (garray[bjj[j]] > rstart) nzb++; 917 } 918 } 919 920 nz = nza + nzb; /* total nz of upper triangular part of mat */ 921 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 922 PetscCall(PetscMalloc1(nz,&val)); 923 mumps->nnz = nz; 924 mumps->irn = row; 925 mumps->jcn = col; 926 mumps->val = mumps->val_alloc = val; 927 } else { 928 val = mumps->val; 929 } 930 931 jj = 0; irow = rstart; 932 for (i=0; i<m; i++) { 933 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 934 v1 = av + adiag[i]; 935 countA = ai[i+1] - adiag[i]; 936 countB = bi[i+1] - bi[i]; 937 bjj = bj + bi[i]; 938 v2 = bv + bi[i]; 939 940 /* A-part */ 941 for (j=0; j<countA; j++) { 942 if (reuse == MAT_INITIAL_MATRIX) { 943 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 944 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 945 } 946 val[jj++] = v1[j]; 947 } 948 949 /* B-part */ 950 for (j=0; j < countB; j++) { 951 if (garray[bjj[j]] > rstart) { 952 if (reuse == MAT_INITIAL_MATRIX) { 953 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 954 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 955 } 956 val[jj++] = v2[j]; 957 } 958 } 959 irow++; 960 } 961 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 962 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatDestroy_MUMPS(Mat A) 967 { 968 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 969 970 PetscFunctionBegin; 971 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 972 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 973 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 974 PetscCall(VecDestroy(&mumps->b_seq)); 975 PetscCall(VecDestroy(&mumps->x_seq)); 976 PetscCall(PetscFree(mumps->id.perm_in)); 977 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 978 PetscCall(PetscFree(mumps->val_alloc)); 979 PetscCall(PetscFree(mumps->info)); 980 PetscCall(MatMumpsResetSchur_Private(mumps)); 981 mumps->id.job = JOB_END; 982 PetscMUMPS_c(mumps); 983 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 984 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 985 if (mumps->use_petsc_omp_support) { 986 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 987 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 988 PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps)); 989 } 990 #endif 991 PetscCall(PetscFree(mumps->ia_alloc)); 992 PetscCall(PetscFree(mumps->ja_alloc)); 993 PetscCall(PetscFree(mumps->recvcount)); 994 PetscCall(PetscFree(mumps->reqs)); 995 PetscCall(PetscFree(mumps->irhs_loc)); 996 if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 997 PetscCall(PetscFree(A->data)); 998 999 /* clear composed functions */ 1000 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1001 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL)); 1002 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL)); 1003 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL)); 1004 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL)); 1005 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL)); 1006 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL)); 1007 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL)); 1008 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL)); 1009 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL)); 1010 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL)); 1011 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL)); 1012 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1018 { 1019 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1020 const PetscMPIInt ompsize=mumps->omp_comm_size; 1021 PetscInt i,m,M,rstart; 1022 1023 PetscFunctionBegin; 1024 PetscCall(MatGetSize(A,&M,NULL)); 1025 PetscCall(MatGetLocalSize(A,&m,NULL)); 1026 PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1027 if (ompsize == 1) { 1028 if (!mumps->irhs_loc) { 1029 mumps->nloc_rhs = m; 1030 PetscCall(PetscMalloc1(m,&mumps->irhs_loc)); 1031 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1032 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1033 } 1034 mumps->id.rhs_loc = (MumpsScalar*)array; 1035 } else { 1036 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1037 const PetscInt *ranges; 1038 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1039 MPI_Group petsc_group,omp_group; 1040 PetscScalar *recvbuf=NULL; 1041 1042 if (mumps->is_omp_master) { 1043 /* Lazily initialize the omp stuff for distributed rhs */ 1044 if (!mumps->irhs_loc) { 1045 PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks)); 1046 PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps)); 1047 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group)); 1048 PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group)); 1049 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1050 PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks)); 1051 1052 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1053 mumps->nloc_rhs = 0; 1054 PetscCall(MatGetOwnershipRanges(A,&ranges)); 1055 for (j=0; j<ompsize; j++) { 1056 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1057 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1058 } 1059 PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc)); 1060 for (j=k=0; j<ompsize; j++) { 1061 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1062 } 1063 1064 PetscCall(PetscFree2(omp_ranks,petsc_ranks)); 1065 PetscCallMPI(MPI_Group_free(&petsc_group)); 1066 PetscCallMPI(MPI_Group_free(&omp_group)); 1067 } 1068 1069 /* Realloc buffers when current nrhs is bigger than what we have met */ 1070 if (nrhs > mumps->max_nrhs) { 1071 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 1072 PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf)); 1073 mumps->max_nrhs = nrhs; 1074 } 1075 1076 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1077 for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j])); 1078 mumps->rhs_disps[0] = 0; 1079 for (j=1; j<ompsize; j++) { 1080 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1081 PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1082 } 1083 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1084 } 1085 1086 PetscCall(PetscMPIIntCast(m*nrhs,&sendcount)); 1087 PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm)); 1088 1089 if (mumps->is_omp_master) { 1090 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1091 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1092 for (j=0; j<ompsize; j++) { 1093 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1094 dst = dstbase; 1095 for (i=0; i<nrhs; i++) { 1096 PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j])); 1097 src += mumps->rhs_nrow[j]; 1098 dst += mumps->nloc_rhs; 1099 } 1100 dstbase += mumps->rhs_nrow[j]; 1101 } 1102 } 1103 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1104 } 1105 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1106 } 1107 mumps->id.nrhs = nrhs; 1108 mumps->id.nloc_rhs = mumps->nloc_rhs; 1109 mumps->id.lrhs_loc = mumps->nloc_rhs; 1110 mumps->id.irhs_loc = mumps->irhs_loc; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1115 { 1116 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1117 const PetscScalar *rarray = NULL; 1118 PetscScalar *array; 1119 IS is_iden,is_petsc; 1120 PetscInt i; 1121 PetscBool second_solve = PETSC_FALSE; 1122 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1123 1124 PetscFunctionBegin; 1125 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1)); 1126 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2)); 1127 1128 if (A->factorerrortype) { 1129 PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1130 PetscCall(VecSetInf(x)); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 mumps->id.nrhs = 1; 1135 if (mumps->petsc_size > 1) { 1136 if (mumps->ICNTL20 == 10) { 1137 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1138 PetscCall(VecGetArrayRead(b,&rarray)); 1139 PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray)); 1140 } else { 1141 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1142 PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1143 PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1144 if (!mumps->myid) { 1145 PetscCall(VecGetArray(mumps->b_seq,&array)); 1146 mumps->id.rhs = (MumpsScalar*)array; 1147 } 1148 } 1149 } else { /* petsc_size == 1 */ 1150 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1151 PetscCall(VecCopy(b,x)); 1152 PetscCall(VecGetArray(x,&array)); 1153 mumps->id.rhs = (MumpsScalar*)array; 1154 } 1155 1156 /* 1157 handle condensation step of Schur complement (if any) 1158 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1159 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1160 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1161 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1162 */ 1163 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1164 PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1165 second_solve = PETSC_TRUE; 1166 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1167 } 1168 /* solve phase */ 1169 /*-------------*/ 1170 mumps->id.job = JOB_SOLVE; 1171 PetscMUMPS_c(mumps); 1172 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1173 1174 /* handle expansion step of Schur complement (if any) */ 1175 if (second_solve) { 1176 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1177 } 1178 1179 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1180 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1181 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1182 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1183 } 1184 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1185 PetscInt *isol2_loc=NULL; 1186 PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */ 1187 PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc)); 1188 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1189 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc)); /* to */ 1190 PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol)); 1191 PetscCall(ISDestroy(&is_iden)); 1192 PetscCall(ISDestroy(&is_petsc)); 1193 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1194 } 1195 1196 PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1197 PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1198 } 1199 1200 if (mumps->petsc_size > 1) { 1201 if (mumps->ICNTL20 == 10) { 1202 PetscCall(VecRestoreArrayRead(b,&rarray)); 1203 } else if (!mumps->myid) { 1204 PetscCall(VecRestoreArray(mumps->b_seq,&array)); 1205 } 1206 } else PetscCall(VecRestoreArray(x,&array)); 1207 1208 PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3))); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1213 { 1214 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1215 1216 PetscFunctionBegin; 1217 mumps->id.ICNTL(9) = 0; 1218 PetscCall(MatSolve_MUMPS(A,b,x)); 1219 mumps->id.ICNTL(9) = 1; 1220 PetscFunctionReturn(0); 1221 } 1222 1223 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1224 { 1225 Mat Bt = NULL; 1226 PetscBool denseX,denseB,flg,flgT; 1227 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1228 PetscInt i,nrhs,M; 1229 PetscScalar *array; 1230 const PetscScalar *rbray; 1231 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1232 PetscMUMPSInt *isol_loc,*isol_loc_save; 1233 PetscScalar *bray,*sol_loc,*sol_loc_save; 1234 IS is_to,is_from; 1235 PetscInt k,proc,j,m,myrstart; 1236 const PetscInt *rstart; 1237 Vec v_mpi,msol_loc; 1238 VecScatter scat_sol; 1239 Vec b_seq; 1240 VecScatter scat_rhs; 1241 PetscScalar *aa; 1242 PetscInt spnr,*ia,*ja; 1243 Mat_MPIAIJ *b = NULL; 1244 1245 PetscFunctionBegin; 1246 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL)); 1247 PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1248 1249 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL)); 1250 if (denseB) { 1251 PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1252 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1253 } else { /* sparse B */ 1254 PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT)); 1256 if (flgT) { /* input B is transpose of actural RHS matrix, 1257 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1258 PetscCall(MatTransposeGetMat(B,&Bt)); 1259 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1260 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1261 } 1262 1263 PetscCall(MatGetSize(B,&M,&nrhs)); 1264 mumps->id.nrhs = nrhs; 1265 mumps->id.lrhs = M; 1266 mumps->id.rhs = NULL; 1267 1268 if (mumps->petsc_size == 1) { 1269 PetscScalar *aa; 1270 PetscInt spnr,*ia,*ja; 1271 PetscBool second_solve = PETSC_FALSE; 1272 1273 PetscCall(MatDenseGetArray(X,&array)); 1274 mumps->id.rhs = (MumpsScalar*)array; 1275 1276 if (denseB) { 1277 /* copy B to X */ 1278 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1279 PetscCall(PetscArraycpy(array,rbray,M*nrhs)); 1280 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1281 } else { /* sparse B */ 1282 PetscCall(MatSeqAIJGetArray(Bt,&aa)); 1283 PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1284 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1285 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1286 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1287 } 1288 /* handle condensation step of Schur complement (if any) */ 1289 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1290 second_solve = PETSC_TRUE; 1291 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1292 } 1293 /* solve phase */ 1294 /*-------------*/ 1295 mumps->id.job = JOB_SOLVE; 1296 PetscMUMPS_c(mumps); 1297 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1298 1299 /* handle expansion step of Schur complement (if any) */ 1300 if (second_solve) { 1301 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1302 } 1303 if (!denseB) { /* sparse B */ 1304 PetscCall(MatSeqAIJRestoreArray(Bt,&aa)); 1305 PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1306 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1307 } 1308 PetscCall(MatDenseRestoreArray(X,&array)); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1313 PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1314 1315 /* create msol_loc to hold mumps local solution */ 1316 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1317 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1318 1319 lsol_loc = mumps->id.lsol_loc; 1320 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1321 PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc)); 1322 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1323 mumps->id.isol_loc = isol_loc; 1324 1325 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc)); 1326 1327 if (denseB) { 1328 if (mumps->ICNTL20 == 10) { 1329 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1330 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1331 PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray)); 1332 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1333 PetscCall(MatGetLocalSize(B,&m,NULL)); 1334 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi)); 1335 } else { 1336 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1337 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1338 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1339 0, re-arrange B into desired order, which is a local operation. 1340 */ 1341 1342 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1343 /* wrap dense rhs matrix B into a vector v_mpi */ 1344 PetscCall(MatGetLocalSize(B,&m,NULL)); 1345 PetscCall(MatDenseGetArray(B,&bray)); 1346 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1347 PetscCall(MatDenseRestoreArray(B,&bray)); 1348 1349 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1350 if (!mumps->myid) { 1351 PetscInt *idx; 1352 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1353 PetscCall(PetscMalloc1(nrhs*M,&idx)); 1354 PetscCall(MatGetOwnershipRanges(B,&rstart)); 1355 k = 0; 1356 for (proc=0; proc<mumps->petsc_size; proc++) { 1357 for (j=0; j<nrhs; j++) { 1358 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1359 } 1360 } 1361 1362 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq)); 1363 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to)); 1364 PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from)); 1365 } else { 1366 PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq)); 1367 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to)); 1368 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from)); 1369 } 1370 PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs)); 1371 PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1372 PetscCall(ISDestroy(&is_to)); 1373 PetscCall(ISDestroy(&is_from)); 1374 PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1375 1376 if (!mumps->myid) { /* define rhs on the host */ 1377 PetscCall(VecGetArray(b_seq,&bray)); 1378 mumps->id.rhs = (MumpsScalar*)bray; 1379 PetscCall(VecRestoreArray(b_seq,&bray)); 1380 } 1381 } 1382 } else { /* sparse B */ 1383 b = (Mat_MPIAIJ*)Bt->data; 1384 1385 /* wrap dense X into a vector v_mpi */ 1386 PetscCall(MatGetLocalSize(X,&m,NULL)); 1387 PetscCall(MatDenseGetArray(X,&bray)); 1388 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1389 PetscCall(MatDenseRestoreArray(X,&bray)); 1390 1391 if (!mumps->myid) { 1392 PetscCall(MatSeqAIJGetArray(b->A,&aa)); 1393 PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1394 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1395 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1396 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1397 } else { 1398 mumps->id.irhs_ptr = NULL; 1399 mumps->id.irhs_sparse = NULL; 1400 mumps->id.nz_rhs = 0; 1401 mumps->id.rhs_sparse = NULL; 1402 } 1403 } 1404 1405 /* solve phase */ 1406 /*-------------*/ 1407 mumps->id.job = JOB_SOLVE; 1408 PetscMUMPS_c(mumps); 1409 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1410 1411 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1412 PetscCall(MatDenseGetArray(X,&array)); 1413 PetscCall(VecPlaceArray(v_mpi,array)); 1414 1415 /* create scatter scat_sol */ 1416 PetscCall(MatGetOwnershipRanges(X,&rstart)); 1417 /* iidx: index for scatter mumps solution to petsc X */ 1418 1419 PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from)); 1420 PetscCall(PetscMalloc1(nlsol_loc,&idxx)); 1421 for (i=0; i<lsol_loc; i++) { 1422 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1423 1424 for (proc=0; proc<mumps->petsc_size; proc++) { 1425 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1426 myrstart = rstart[proc]; 1427 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1428 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1429 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1430 break; 1431 } 1432 } 1433 1434 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1435 } 1436 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to)); 1437 PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol)); 1438 PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1439 PetscCall(ISDestroy(&is_from)); 1440 PetscCall(ISDestroy(&is_to)); 1441 PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1442 PetscCall(MatDenseRestoreArray(X,&array)); 1443 1444 /* free spaces */ 1445 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1446 mumps->id.isol_loc = isol_loc_save; 1447 1448 PetscCall(PetscFree2(sol_loc,isol_loc)); 1449 PetscCall(PetscFree(idxx)); 1450 PetscCall(VecDestroy(&msol_loc)); 1451 PetscCall(VecDestroy(&v_mpi)); 1452 if (!denseB) { 1453 if (!mumps->myid) { 1454 b = (Mat_MPIAIJ*)Bt->data; 1455 PetscCall(MatSeqAIJRestoreArray(b->A,&aa)); 1456 PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1457 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1458 } 1459 } else { 1460 if (mumps->ICNTL20 == 0) { 1461 PetscCall(VecDestroy(&b_seq)); 1462 PetscCall(VecScatterDestroy(&scat_rhs)); 1463 } 1464 } 1465 PetscCall(VecScatterDestroy(&scat_sol)); 1466 PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3))); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1471 { 1472 PetscBool flg; 1473 Mat B; 1474 1475 PetscFunctionBegin; 1476 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 1477 PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1478 1479 /* Create B=Bt^T that uses Bt's data structure */ 1480 PetscCall(MatCreateTranspose(Bt,&B)); 1481 1482 PetscCall(MatMatSolve_MUMPS(A,B,X)); 1483 PetscCall(MatDestroy(&B)); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #if !defined(PETSC_USE_COMPLEX) 1488 /* 1489 input: 1490 F: numeric factor 1491 output: 1492 nneg: total number of negative pivots 1493 nzero: total number of zero pivots 1494 npos: (global dimension of F) - nneg - nzero 1495 */ 1496 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1497 { 1498 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1499 PetscMPIInt size; 1500 1501 PetscFunctionBegin; 1502 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size)); 1503 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1504 PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1505 1506 if (nneg) *nneg = mumps->id.INFOG(12); 1507 if (nzero || npos) { 1508 PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1509 if (nzero) *nzero = mumps->id.INFOG(28); 1510 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 #endif 1515 1516 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1517 { 1518 PetscInt i,nreqs; 1519 PetscMUMPSInt *irn,*jcn; 1520 PetscMPIInt count; 1521 PetscInt64 totnnz,remain; 1522 const PetscInt osize=mumps->omp_comm_size; 1523 PetscScalar *val; 1524 1525 PetscFunctionBegin; 1526 if (osize > 1) { 1527 if (reuse == MAT_INITIAL_MATRIX) { 1528 /* master first gathers counts of nonzeros to receive */ 1529 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount)); 1530 PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm)); 1531 1532 /* Then each computes number of send/recvs */ 1533 if (mumps->is_omp_master) { 1534 /* Start from 1 since self communication is not done in MPI */ 1535 nreqs = 0; 1536 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1537 } else { 1538 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1539 } 1540 PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1541 1542 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1543 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1544 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1545 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1546 */ 1547 nreqs = 0; /* counter for actual send/recvs */ 1548 if (mumps->is_omp_master) { 1549 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1550 PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn)); 1551 PetscCall(PetscMalloc1(totnnz,&val)); 1552 1553 /* Self communication */ 1554 PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz)); 1555 PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz)); 1556 PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz)); 1557 1558 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1559 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 1560 PetscCall(PetscFree(mumps->val_alloc)); 1561 mumps->nnz = totnnz; 1562 mumps->irn = irn; 1563 mumps->jcn = jcn; 1564 mumps->val = mumps->val_alloc = val; 1565 1566 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1567 jcn += mumps->recvcount[0]; 1568 val += mumps->recvcount[0]; 1569 1570 /* Remote communication */ 1571 for (i=1; i<osize; i++) { 1572 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1573 remain = mumps->recvcount[i] - count; 1574 while (count>0) { 1575 PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1576 PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1577 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1578 irn += count; 1579 jcn += count; 1580 val += count; 1581 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1582 remain -= count; 1583 } 1584 } 1585 } else { 1586 irn = mumps->irn; 1587 jcn = mumps->jcn; 1588 val = mumps->val; 1589 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1590 remain = mumps->nnz - count; 1591 while (count>0) { 1592 PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1593 PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1594 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1595 irn += count; 1596 jcn += count; 1597 val += count; 1598 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1599 remain -= count; 1600 } 1601 } 1602 } else { 1603 nreqs = 0; 1604 if (mumps->is_omp_master) { 1605 val = mumps->val + mumps->recvcount[0]; 1606 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1607 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1608 remain = mumps->recvcount[i] - count; 1609 while (count>0) { 1610 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1611 val += count; 1612 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1613 remain -= count; 1614 } 1615 } 1616 } else { 1617 val = mumps->val; 1618 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1619 remain = mumps->nnz - count; 1620 while (count>0) { 1621 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1622 val += count; 1623 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1624 remain -= count; 1625 } 1626 } 1627 } 1628 PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE)); 1629 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1635 { 1636 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1637 PetscBool isMPIAIJ; 1638 1639 PetscFunctionBegin; 1640 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1641 if (mumps->id.INFOG(1) == -6) { 1642 PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1643 } 1644 PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1649 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps)); 1650 1651 /* numerical factorization phase */ 1652 /*-------------------------------*/ 1653 mumps->id.job = JOB_FACTNUMERIC; 1654 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1655 if (!mumps->myid) { 1656 mumps->id.a = (MumpsScalar*)mumps->val; 1657 } 1658 } else { 1659 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1660 } 1661 PetscMUMPS_c(mumps); 1662 if (mumps->id.INFOG(1) < 0) { 1663 if (A->erroriffailure) { 1664 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1665 } else { 1666 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1667 PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1668 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1669 } else if (mumps->id.INFOG(1) == -13) { 1670 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1671 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1672 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1673 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1674 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1675 } else { 1676 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1677 F->factorerrortype = MAT_FACTOR_OTHER; 1678 } 1679 } 1680 } 1681 PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1682 1683 F->assembled = PETSC_TRUE; 1684 1685 if (F->schur) { /* reset Schur status to unfactored */ 1686 #if defined(PETSC_HAVE_CUDA) 1687 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1688 #endif 1689 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1690 mumps->id.ICNTL(19) = 2; 1691 PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur)); 1692 } 1693 PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED)); 1694 } 1695 1696 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1697 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1698 1699 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1700 if (mumps->petsc_size > 1) { 1701 PetscInt lsol_loc; 1702 PetscScalar *sol_loc; 1703 1704 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ)); 1705 1706 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1707 if (mumps->x_seq) { 1708 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1709 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 1710 PetscCall(VecDestroy(&mumps->x_seq)); 1711 } 1712 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1713 PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc)); 1714 mumps->id.lsol_loc = lsol_loc; 1715 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1716 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq)); 1717 } 1718 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 /* Sets MUMPS options from the options database */ 1723 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1724 { 1725 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1726 PetscMUMPSInt icntl=0; 1727 PetscInt info[80],i,ninfo=80; 1728 PetscBool flg=PETSC_FALSE; 1729 1730 PetscFunctionBegin; 1731 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat"); 1732 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg)); 1733 if (flg) mumps->id.ICNTL(1) = icntl; 1734 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg)); 1735 if (flg) mumps->id.ICNTL(2) = icntl; 1736 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg)); 1737 if (flg) mumps->id.ICNTL(3) = icntl; 1738 1739 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg)); 1740 if (flg) mumps->id.ICNTL(4) = icntl; 1741 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1742 1743 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg)); 1744 if (flg) mumps->id.ICNTL(6) = icntl; 1745 1746 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg)); 1747 if (flg) { 1748 PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1749 mumps->id.ICNTL(7) = icntl; 1750 } 1751 1752 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL)); 1753 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1754 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL)); 1755 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL)); 1756 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL)); 1757 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL)); 1758 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL)); 1759 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL)); 1760 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1761 PetscCall(MatDestroy(&F->schur)); 1762 PetscCall(MatMumpsResetSchur_Private(mumps)); 1763 } 1764 1765 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1766 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1767 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1768 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1769 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1770 In short, we could not use distributed RHS with MPICH until v4.0b1. 1771 */ 1772 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1773 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1774 #else 1775 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1776 #endif 1777 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg)); 1778 PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1779 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1780 PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1781 #endif 1782 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1783 1784 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL)); 1785 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL)); 1786 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL)); 1787 if (mumps->id.ICNTL(24)) { 1788 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1789 } 1790 1791 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL)); 1792 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL)); 1793 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL)); 1794 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL)); 1795 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL)); 1796 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1797 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL)); 1798 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1799 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL)); 1800 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL)); 1801 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL)); 1802 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL)); 1803 1804 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL)); 1805 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL)); 1806 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL)); 1807 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL)); 1808 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL)); 1809 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL)); 1810 1811 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1812 1813 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL)); 1814 if (ninfo) { 1815 PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1816 PetscCall(PetscMalloc1(ninfo,&mumps->info)); 1817 mumps->ninfo = ninfo; 1818 for (i=0; i<ninfo; i++) { 1819 PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1820 else mumps->info[i] = info[i]; 1821 } 1822 } 1823 1824 PetscOptionsEnd(); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1829 { 1830 PetscInt nthreads=0; 1831 1832 PetscFunctionBegin; 1833 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1834 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size)); 1835 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */ 1836 1837 PetscCall(PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support)); 1838 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1839 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL)); 1840 if (mumps->use_petsc_omp_support) { 1841 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1842 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl)); 1843 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master)); 1844 #else 1845 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1846 #endif 1847 } else { 1848 mumps->omp_comm = PETSC_COMM_SELF; 1849 mumps->mumps_comm = mumps->petsc_comm; 1850 mumps->is_omp_master = PETSC_TRUE; 1851 } 1852 PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size)); 1853 mumps->reqs = NULL; 1854 mumps->tag = 0; 1855 1856 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1857 if (mumps->mumps_comm != MPI_COMM_NULL) { 1858 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 1859 } 1860 1861 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1862 mumps->id.job = JOB_INIT; 1863 mumps->id.par = 1; /* host participates factorizaton and solve */ 1864 mumps->id.sym = mumps->sym; 1865 1866 PetscMUMPS_c(mumps); 1867 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1868 1869 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1870 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1871 */ 1872 PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm)); 1873 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm)); 1874 1875 mumps->scat_rhs = NULL; 1876 mumps->scat_sol = NULL; 1877 1878 /* set PETSc-MUMPS default options - override MUMPS default */ 1879 mumps->id.ICNTL(3) = 0; 1880 mumps->id.ICNTL(4) = 0; 1881 if (mumps->petsc_size == 1) { 1882 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1883 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1884 } else { 1885 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1886 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1887 } 1888 1889 /* schur */ 1890 mumps->id.size_schur = 0; 1891 mumps->id.listvar_schur = NULL; 1892 mumps->id.schur = NULL; 1893 mumps->sizeredrhs = 0; 1894 mumps->schur_sol = NULL; 1895 mumps->schur_sizesol = 0; 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1900 { 1901 PetscFunctionBegin; 1902 if (mumps->id.INFOG(1) < 0) { 1903 if (A->erroriffailure) { 1904 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1905 } else { 1906 if (mumps->id.INFOG(1) == -6) { 1907 PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1908 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1909 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1910 PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1911 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1912 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1913 PetscCall(PetscInfo(F,"Empty matrix\n")); 1914 } else { 1915 PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1916 F->factorerrortype = MAT_FACTOR_OTHER; 1917 } 1918 } 1919 } 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1924 { 1925 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1926 Vec b; 1927 const PetscInt M = A->rmap->N; 1928 1929 PetscFunctionBegin; 1930 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1931 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1932 PetscFunctionReturn(0); 1933 } 1934 1935 /* Set MUMPS options from the options database */ 1936 PetscCall(PetscSetMUMPSFromOptions(F,A)); 1937 1938 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1939 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 1940 1941 /* analysis phase */ 1942 /*----------------*/ 1943 mumps->id.job = JOB_FACTSYMBOLIC; 1944 mumps->id.n = M; 1945 switch (mumps->id.ICNTL(18)) { 1946 case 0: /* centralized assembled matrix input */ 1947 if (!mumps->myid) { 1948 mumps->id.nnz = mumps->nnz; 1949 mumps->id.irn = mumps->irn; 1950 mumps->id.jcn = mumps->jcn; 1951 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1952 if (r) { 1953 mumps->id.ICNTL(7) = 1; 1954 if (!mumps->myid) { 1955 const PetscInt *idx; 1956 PetscInt i; 1957 1958 PetscCall(PetscMalloc1(M,&mumps->id.perm_in)); 1959 PetscCall(ISGetIndices(r,&idx)); 1960 for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 1961 PetscCall(ISRestoreIndices(r,&idx)); 1962 } 1963 } 1964 } 1965 break; 1966 case 3: /* distributed assembled matrix input (size>1) */ 1967 mumps->id.nnz_loc = mumps->nnz; 1968 mumps->id.irn_loc = mumps->irn; 1969 mumps->id.jcn_loc = mumps->jcn; 1970 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1971 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1972 PetscCall(MatCreateVecs(A,NULL,&b)); 1973 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 1974 PetscCall(VecDestroy(&b)); 1975 } 1976 break; 1977 } 1978 PetscMUMPS_c(mumps); 1979 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 1980 1981 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1982 F->ops->solve = MatSolve_MUMPS; 1983 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1984 F->ops->matsolve = MatMatSolve_MUMPS; 1985 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1986 1987 mumps->matstruc = SAME_NONZERO_PATTERN; 1988 PetscFunctionReturn(0); 1989 } 1990 1991 /* Note the Petsc r and c permutations are ignored */ 1992 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1993 { 1994 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1995 Vec b; 1996 const PetscInt M = A->rmap->N; 1997 1998 PetscFunctionBegin; 1999 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2000 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2001 PetscFunctionReturn(0); 2002 } 2003 2004 /* Set MUMPS options from the options database */ 2005 PetscCall(PetscSetMUMPSFromOptions(F,A)); 2006 2007 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2008 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2009 2010 /* analysis phase */ 2011 /*----------------*/ 2012 mumps->id.job = JOB_FACTSYMBOLIC; 2013 mumps->id.n = M; 2014 switch (mumps->id.ICNTL(18)) { 2015 case 0: /* centralized assembled matrix input */ 2016 if (!mumps->myid) { 2017 mumps->id.nnz = mumps->nnz; 2018 mumps->id.irn = mumps->irn; 2019 mumps->id.jcn = mumps->jcn; 2020 if (mumps->id.ICNTL(6)>1) { 2021 mumps->id.a = (MumpsScalar*)mumps->val; 2022 } 2023 } 2024 break; 2025 case 3: /* distributed assembled matrix input (size>1) */ 2026 mumps->id.nnz_loc = mumps->nnz; 2027 mumps->id.irn_loc = mumps->irn; 2028 mumps->id.jcn_loc = mumps->jcn; 2029 if (mumps->id.ICNTL(6)>1) { 2030 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2031 } 2032 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2033 PetscCall(MatCreateVecs(A,NULL,&b)); 2034 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2035 PetscCall(VecDestroy(&b)); 2036 } 2037 break; 2038 } 2039 PetscMUMPS_c(mumps); 2040 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2041 2042 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2043 F->ops->solve = MatSolve_MUMPS; 2044 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2045 2046 mumps->matstruc = SAME_NONZERO_PATTERN; 2047 PetscFunctionReturn(0); 2048 } 2049 2050 /* Note the Petsc r permutation and factor info are ignored */ 2051 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2052 { 2053 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2054 Vec b; 2055 const PetscInt M = A->rmap->N; 2056 2057 PetscFunctionBegin; 2058 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2059 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2060 PetscFunctionReturn(0); 2061 } 2062 2063 /* Set MUMPS options from the options database */ 2064 PetscCall(PetscSetMUMPSFromOptions(F,A)); 2065 2066 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2067 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2068 2069 /* analysis phase */ 2070 /*----------------*/ 2071 mumps->id.job = JOB_FACTSYMBOLIC; 2072 mumps->id.n = M; 2073 switch (mumps->id.ICNTL(18)) { 2074 case 0: /* centralized assembled matrix input */ 2075 if (!mumps->myid) { 2076 mumps->id.nnz = mumps->nnz; 2077 mumps->id.irn = mumps->irn; 2078 mumps->id.jcn = mumps->jcn; 2079 if (mumps->id.ICNTL(6)>1) { 2080 mumps->id.a = (MumpsScalar*)mumps->val; 2081 } 2082 } 2083 break; 2084 case 3: /* distributed assembled matrix input (size>1) */ 2085 mumps->id.nnz_loc = mumps->nnz; 2086 mumps->id.irn_loc = mumps->irn; 2087 mumps->id.jcn_loc = mumps->jcn; 2088 if (mumps->id.ICNTL(6)>1) { 2089 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2090 } 2091 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2092 PetscCall(MatCreateVecs(A,NULL,&b)); 2093 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2094 PetscCall(VecDestroy(&b)); 2095 } 2096 break; 2097 } 2098 PetscMUMPS_c(mumps); 2099 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2100 2101 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2102 F->ops->solve = MatSolve_MUMPS; 2103 F->ops->solvetranspose = MatSolve_MUMPS; 2104 F->ops->matsolve = MatMatSolve_MUMPS; 2105 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2106 #if defined(PETSC_USE_COMPLEX) 2107 F->ops->getinertia = NULL; 2108 #else 2109 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2110 #endif 2111 2112 mumps->matstruc = SAME_NONZERO_PATTERN; 2113 PetscFunctionReturn(0); 2114 } 2115 2116 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2117 { 2118 PetscBool iascii; 2119 PetscViewerFormat format; 2120 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2121 2122 PetscFunctionBegin; 2123 /* check if matrix is mumps type */ 2124 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2125 2126 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 2127 if (iascii) { 2128 PetscCall(PetscViewerGetFormat(viewer,&format)); 2129 if (format == PETSC_VIEWER_ASCII_INFO) { 2130 PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n")); 2131 PetscCall(PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym)); 2132 PetscCall(PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par)); 2133 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1))); 2134 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2))); 2135 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3))); 2136 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4))); 2137 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5))); 2138 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6))); 2139 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7))); 2140 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8))); 2141 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10))); 2142 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11))); 2143 if (mumps->id.ICNTL(11)>0) { 2144 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4))); 2145 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5))); 2146 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6))); 2147 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8))); 2148 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9))); 2149 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11))); 2150 } 2151 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12))); 2152 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13))); 2153 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14))); 2154 /* ICNTL(15-17) not used */ 2155 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18))); 2156 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19))); 2157 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20))); 2158 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21))); 2159 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22))); 2160 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23))); 2161 2162 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24))); 2163 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25))); 2164 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26))); 2165 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27))); 2166 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28))); 2167 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29))); 2168 2169 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30))); 2170 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31))); 2171 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35))); 2173 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36))); 2174 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38))); 2175 2176 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4))); 2180 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5))); 2181 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7))); 2182 2183 /* information local to each processor */ 2184 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2185 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2186 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1))); 2187 PetscCall(PetscViewerFlush(viewer)); 2188 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2189 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2))); 2190 PetscCall(PetscViewerFlush(viewer)); 2191 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2192 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3))); 2193 PetscCall(PetscViewerFlush(viewer)); 2194 2195 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2196 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15))); 2197 PetscCall(PetscViewerFlush(viewer)); 2198 2199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2200 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16))); 2201 PetscCall(PetscViewerFlush(viewer)); 2202 2203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2204 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23))); 2205 PetscCall(PetscViewerFlush(viewer)); 2206 2207 if (mumps->ninfo && mumps->ninfo <= 80) { 2208 PetscInt i; 2209 for (i=0; i<mumps->ninfo; i++) { 2210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i])); 2211 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]))); 2212 PetscCall(PetscViewerFlush(viewer)); 2213 } 2214 } 2215 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2216 2217 if (!mumps->myid) { /* information from the host */ 2218 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1))); 2219 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2))); 2220 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3))); 2221 PetscCall(PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34))); 2222 2223 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3))); 2224 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4))); 2225 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5))); 2226 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6))); 2227 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7))); 2228 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8))); 2229 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9))); 2230 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10))); 2231 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11))); 2232 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12))); 2233 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13))); 2234 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14))); 2235 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15))); 2236 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16))); 2237 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17))); 2238 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18))); 2239 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19))); 2240 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20))); 2241 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21))); 2242 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22))); 2243 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23))); 2244 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24))); 2245 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25))); 2246 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28))); 2247 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29))); 2248 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31))); 2249 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32))); 2250 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33))); 2251 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34))); 2252 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35))); 2253 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36))); 2254 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37))); 2255 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38))); 2256 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39))); 2257 } 2258 } 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2264 { 2265 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2266 2267 PetscFunctionBegin; 2268 info->block_size = 1.0; 2269 info->nz_allocated = mumps->id.INFOG(20); 2270 info->nz_used = mumps->id.INFOG(20); 2271 info->nz_unneeded = 0.0; 2272 info->assemblies = 0.0; 2273 info->mallocs = 0.0; 2274 info->memory = 0.0; 2275 info->fill_ratio_given = 0; 2276 info->fill_ratio_needed = 0; 2277 info->factor_mallocs = 0; 2278 PetscFunctionReturn(0); 2279 } 2280 2281 /* -------------------------------------------------------------------------------------------*/ 2282 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2283 { 2284 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2285 const PetscScalar *arr; 2286 const PetscInt *idxs; 2287 PetscInt size,i; 2288 2289 PetscFunctionBegin; 2290 PetscCall(ISGetLocalSize(is,&size)); 2291 if (mumps->petsc_size > 1) { 2292 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2293 2294 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2295 PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm)); 2296 PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2297 } 2298 2299 /* Schur complement matrix */ 2300 PetscCall(MatDestroy(&F->schur)); 2301 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur)); 2302 PetscCall(MatDenseGetArrayRead(F->schur,&arr)); 2303 mumps->id.schur = (MumpsScalar*)arr; 2304 mumps->id.size_schur = size; 2305 mumps->id.schur_lld = size; 2306 PetscCall(MatDenseRestoreArrayRead(F->schur,&arr)); 2307 if (mumps->sym == 1) { 2308 PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE)); 2309 } 2310 2311 /* MUMPS expects Fortran style indices */ 2312 PetscCall(PetscFree(mumps->id.listvar_schur)); 2313 PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur)); 2314 PetscCall(ISGetIndices(is,&idxs)); 2315 for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]))); 2316 PetscCall(ISRestoreIndices(is,&idxs)); 2317 if (mumps->petsc_size > 1) { 2318 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2319 } else { 2320 if (F->factortype == MAT_FACTOR_LU) { 2321 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2322 } else { 2323 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2324 } 2325 } 2326 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2327 mumps->id.ICNTL(26) = -1; 2328 PetscFunctionReturn(0); 2329 } 2330 2331 /* -------------------------------------------------------------------------------------------*/ 2332 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2333 { 2334 Mat St; 2335 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2336 PetscScalar *array; 2337 #if defined(PETSC_USE_COMPLEX) 2338 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2339 #endif 2340 2341 PetscFunctionBegin; 2342 PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2343 PetscCall(MatCreate(PETSC_COMM_SELF,&St)); 2344 PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur)); 2345 PetscCall(MatSetType(St,MATDENSE)); 2346 PetscCall(MatSetUp(St)); 2347 PetscCall(MatDenseGetArray(St,&array)); 2348 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2349 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2350 PetscInt i,j,N=mumps->id.size_schur; 2351 for (i=0;i<N;i++) { 2352 for (j=0;j<N;j++) { 2353 #if !defined(PETSC_USE_COMPLEX) 2354 PetscScalar val = mumps->id.schur[i*N+j]; 2355 #else 2356 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2357 #endif 2358 array[j*N+i] = val; 2359 } 2360 } 2361 } else { /* stored by columns */ 2362 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2363 } 2364 } else { /* either full or lower-triangular (not packed) */ 2365 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2366 PetscInt i,j,N=mumps->id.size_schur; 2367 for (i=0;i<N;i++) { 2368 for (j=i;j<N;j++) { 2369 #if !defined(PETSC_USE_COMPLEX) 2370 PetscScalar val = mumps->id.schur[i*N+j]; 2371 #else 2372 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2373 #endif 2374 array[i*N+j] = val; 2375 array[j*N+i] = val; 2376 } 2377 } 2378 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2379 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2380 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2381 PetscInt i,j,N=mumps->id.size_schur; 2382 for (i=0;i<N;i++) { 2383 for (j=0;j<i+1;j++) { 2384 #if !defined(PETSC_USE_COMPLEX) 2385 PetscScalar val = mumps->id.schur[i*N+j]; 2386 #else 2387 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2388 #endif 2389 array[i*N+j] = val; 2390 array[j*N+i] = val; 2391 } 2392 } 2393 } 2394 } 2395 PetscCall(MatDenseRestoreArray(St,&array)); 2396 *S = St; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 /* -------------------------------------------------------------------------------------------*/ 2401 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2402 { 2403 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2404 2405 PetscFunctionBegin; 2406 PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl))); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2411 { 2412 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2413 2414 PetscFunctionBegin; 2415 *ival = mumps->id.ICNTL(icntl); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 /*@ 2420 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2421 2422 Logically Collective on Mat 2423 2424 Input Parameters: 2425 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2426 . icntl - index of MUMPS parameter array ICNTL() 2427 - ival - value of MUMPS ICNTL(icntl) 2428 2429 Options Database: 2430 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2431 2432 Level: beginner 2433 2434 References: 2435 . * - MUMPS Users' Guide 2436 2437 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2438 @*/ 2439 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2440 { 2441 PetscFunctionBegin; 2442 PetscValidType(F,1); 2443 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2444 PetscValidLogicalCollectiveInt(F,icntl,2); 2445 PetscValidLogicalCollectiveInt(F,ival,3); 2446 PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival)); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 /*@ 2451 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2452 2453 Logically Collective on Mat 2454 2455 Input Parameters: 2456 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2457 - icntl - index of MUMPS parameter array ICNTL() 2458 2459 Output Parameter: 2460 . ival - value of MUMPS ICNTL(icntl) 2461 2462 Level: beginner 2463 2464 References: 2465 . * - MUMPS Users' Guide 2466 2467 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2468 @*/ 2469 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2470 { 2471 PetscFunctionBegin; 2472 PetscValidType(F,1); 2473 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2474 PetscValidLogicalCollectiveInt(F,icntl,2); 2475 PetscValidIntPointer(ival,3); 2476 PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 /* -------------------------------------------------------------------------------------------*/ 2481 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2482 { 2483 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2484 2485 PetscFunctionBegin; 2486 mumps->id.CNTL(icntl) = val; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2491 { 2492 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2493 2494 PetscFunctionBegin; 2495 *val = mumps->id.CNTL(icntl); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 /*@ 2500 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2501 2502 Logically Collective on Mat 2503 2504 Input Parameters: 2505 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2506 . icntl - index of MUMPS parameter array CNTL() 2507 - val - value of MUMPS CNTL(icntl) 2508 2509 Options Database: 2510 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2511 2512 Level: beginner 2513 2514 References: 2515 . * - MUMPS Users' Guide 2516 2517 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2518 @*/ 2519 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2520 { 2521 PetscFunctionBegin; 2522 PetscValidType(F,1); 2523 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2524 PetscValidLogicalCollectiveInt(F,icntl,2); 2525 PetscValidLogicalCollectiveReal(F,val,3); 2526 PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val)); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 /*@ 2531 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2532 2533 Logically Collective on Mat 2534 2535 Input Parameters: 2536 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2537 - icntl - index of MUMPS parameter array CNTL() 2538 2539 Output Parameter: 2540 . val - value of MUMPS CNTL(icntl) 2541 2542 Level: beginner 2543 2544 References: 2545 . * - MUMPS Users' Guide 2546 2547 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2548 @*/ 2549 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2550 { 2551 PetscFunctionBegin; 2552 PetscValidType(F,1); 2553 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2554 PetscValidLogicalCollectiveInt(F,icntl,2); 2555 PetscValidRealPointer(val,3); 2556 PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2561 { 2562 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2563 2564 PetscFunctionBegin; 2565 *info = mumps->id.INFO(icntl); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2570 { 2571 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2572 2573 PetscFunctionBegin; 2574 *infog = mumps->id.INFOG(icntl); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2579 { 2580 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2581 2582 PetscFunctionBegin; 2583 *rinfo = mumps->id.RINFO(icntl); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2588 { 2589 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2590 2591 PetscFunctionBegin; 2592 *rinfog = mumps->id.RINFOG(icntl); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2597 { 2598 Mat Bt = NULL,Btseq = NULL; 2599 PetscBool flg; 2600 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2601 PetscScalar *aa; 2602 PetscInt spnr,*ia,*ja,M,nrhs; 2603 2604 PetscFunctionBegin; 2605 PetscValidPointer(spRHS,2); 2606 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg)); 2607 if (flg) { 2608 PetscCall(MatTransposeGetMat(spRHS,&Bt)); 2609 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2610 2611 PetscCall(MatMumpsSetIcntl(F,30,1)); 2612 2613 if (mumps->petsc_size > 1) { 2614 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2615 Btseq = b->A; 2616 } else { 2617 Btseq = Bt; 2618 } 2619 2620 PetscCall(MatGetSize(spRHS,&M,&nrhs)); 2621 mumps->id.nrhs = nrhs; 2622 mumps->id.lrhs = M; 2623 mumps->id.rhs = NULL; 2624 2625 if (!mumps->myid) { 2626 PetscCall(MatSeqAIJGetArray(Btseq,&aa)); 2627 PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2628 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2629 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 2630 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2631 } else { 2632 mumps->id.irhs_ptr = NULL; 2633 mumps->id.irhs_sparse = NULL; 2634 mumps->id.nz_rhs = 0; 2635 mumps->id.rhs_sparse = NULL; 2636 } 2637 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2638 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2639 2640 /* solve phase */ 2641 /*-------------*/ 2642 mumps->id.job = JOB_SOLVE; 2643 PetscMUMPS_c(mumps); 2644 if (mumps->id.INFOG(1) < 0) 2645 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2646 2647 if (!mumps->myid) { 2648 PetscCall(MatSeqAIJRestoreArray(Btseq,&aa)); 2649 PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2650 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2651 } 2652 PetscFunctionReturn(0); 2653 } 2654 2655 /*@ 2656 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2657 2658 Logically Collective on Mat 2659 2660 Input Parameters: 2661 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2662 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2663 2664 Output Parameter: 2665 . spRHS - requested entries of inverse of A 2666 2667 Level: beginner 2668 2669 References: 2670 . * - MUMPS Users' Guide 2671 2672 .seealso: MatGetFactor(), MatCreateTranspose() 2673 @*/ 2674 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2675 { 2676 PetscFunctionBegin; 2677 PetscValidType(F,1); 2678 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2679 PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS)); 2680 PetscFunctionReturn(0); 2681 } 2682 2683 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2684 { 2685 Mat spRHS; 2686 2687 PetscFunctionBegin; 2688 PetscCall(MatCreateTranspose(spRHST,&spRHS)); 2689 PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS)); 2690 PetscCall(MatDestroy(&spRHS)); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 /*@ 2695 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2696 2697 Logically Collective on Mat 2698 2699 Input Parameters: 2700 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2701 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2702 2703 Output Parameter: 2704 . spRHST - requested entries of inverse of A^T 2705 2706 Level: beginner 2707 2708 References: 2709 . * - MUMPS Users' Guide 2710 2711 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2712 @*/ 2713 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2714 { 2715 PetscBool flg; 2716 2717 PetscFunctionBegin; 2718 PetscValidType(F,1); 2719 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2720 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 2721 PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2722 2723 PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST)); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 /*@ 2728 MatMumpsGetInfo - Get MUMPS parameter INFO() 2729 2730 Logically Collective on Mat 2731 2732 Input Parameters: 2733 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2734 - icntl - index of MUMPS parameter array INFO() 2735 2736 Output Parameter: 2737 . ival - value of MUMPS INFO(icntl) 2738 2739 Level: beginner 2740 2741 References: 2742 . * - MUMPS Users' Guide 2743 2744 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2745 @*/ 2746 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2747 { 2748 PetscFunctionBegin; 2749 PetscValidType(F,1); 2750 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2751 PetscValidIntPointer(ival,3); 2752 PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 /*@ 2757 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2758 2759 Logically Collective on Mat 2760 2761 Input Parameters: 2762 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2763 - icntl - index of MUMPS parameter array INFOG() 2764 2765 Output Parameter: 2766 . ival - value of MUMPS INFOG(icntl) 2767 2768 Level: beginner 2769 2770 References: 2771 . * - MUMPS Users' Guide 2772 2773 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2774 @*/ 2775 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2776 { 2777 PetscFunctionBegin; 2778 PetscValidType(F,1); 2779 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2780 PetscValidIntPointer(ival,3); 2781 PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2782 PetscFunctionReturn(0); 2783 } 2784 2785 /*@ 2786 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2787 2788 Logically Collective on Mat 2789 2790 Input Parameters: 2791 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2792 - icntl - index of MUMPS parameter array RINFO() 2793 2794 Output Parameter: 2795 . val - value of MUMPS RINFO(icntl) 2796 2797 Level: beginner 2798 2799 References: 2800 . * - MUMPS Users' Guide 2801 2802 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2803 @*/ 2804 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2805 { 2806 PetscFunctionBegin; 2807 PetscValidType(F,1); 2808 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2809 PetscValidRealPointer(val,3); 2810 PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 /*@ 2815 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2816 2817 Logically Collective on Mat 2818 2819 Input Parameters: 2820 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2821 - icntl - index of MUMPS parameter array RINFOG() 2822 2823 Output Parameter: 2824 . val - value of MUMPS RINFOG(icntl) 2825 2826 Level: beginner 2827 2828 References: 2829 . * - MUMPS Users' Guide 2830 2831 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2832 @*/ 2833 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2834 { 2835 PetscFunctionBegin; 2836 PetscValidType(F,1); 2837 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2838 PetscValidRealPointer(val,3); 2839 PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2840 PetscFunctionReturn(0); 2841 } 2842 2843 /*MC 2844 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2845 distributed and sequential matrices via the external package MUMPS. 2846 2847 Works with MATAIJ and MATSBAIJ matrices 2848 2849 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2850 2851 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2852 2853 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2854 2855 Options Database Keys: 2856 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2857 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2858 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2859 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2860 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2861 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2862 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2863 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2864 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2865 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2866 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2867 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2868 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2869 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2870 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2871 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2872 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2873 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2874 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2875 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2876 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2877 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2878 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2879 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2880 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2881 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2882 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2883 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2884 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2885 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2886 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2887 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2888 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2889 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2890 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2891 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2892 2893 Level: beginner 2894 2895 Notes: 2896 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2897 2898 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2899 $ KSPGetPC(ksp,&pc); 2900 $ PCFactorGetMatrix(pc,&mat); 2901 $ MatMumpsGetInfo(mat,....); 2902 $ MatMumpsGetInfog(mat,....); etc. 2903 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2904 2905 Using MUMPS with 64-bit integers 2906 MUMPS provides 64-bit integer support in two build modes: 2907 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2908 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2909 2910 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2911 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2912 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2913 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2914 2915 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2916 2917 Two modes to run MUMPS/PETSc with OpenMP 2918 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2919 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2920 2921 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2922 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2923 2924 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2925 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2926 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2927 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2928 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2929 2930 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2931 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2932 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2933 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2934 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2935 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2936 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2937 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2938 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2939 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2940 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2941 examine the mapping result. 2942 2943 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2944 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2945 calls omp_set_num_threads(m) internally before calling MUMPS. 2946 2947 References: 2948 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2949 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2950 2951 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix() 2952 2953 M*/ 2954 2955 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2956 { 2957 PetscFunctionBegin; 2958 *type = MATSOLVERMUMPS; 2959 PetscFunctionReturn(0); 2960 } 2961 2962 /* MatGetFactor for Seq and MPI AIJ matrices */ 2963 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2964 { 2965 Mat B; 2966 Mat_MUMPS *mumps; 2967 PetscBool isSeqAIJ; 2968 PetscMPIInt size; 2969 2970 PetscFunctionBegin; 2971 #if defined(PETSC_USE_COMPLEX) 2972 PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2973 #endif 2974 /* Create the factorization matrix */ 2975 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ)); 2976 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2977 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2978 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 2979 PetscCall(MatSetUp(B)); 2980 2981 PetscCall(PetscNewLog(B,&mumps)); 2982 2983 B->ops->view = MatView_MUMPS; 2984 B->ops->getinfo = MatGetInfo_MUMPS; 2985 2986 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 2987 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 2988 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 2989 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 2990 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 2991 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 2992 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 2993 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 2994 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 2995 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 2996 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 2997 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 2998 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 2999 3000 if (ftype == MAT_FACTOR_LU) { 3001 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3002 B->factortype = MAT_FACTOR_LU; 3003 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3004 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3005 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3006 mumps->sym = 0; 3007 } else { 3008 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3009 B->factortype = MAT_FACTOR_CHOLESKY; 3010 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3011 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3012 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3013 #if defined(PETSC_USE_COMPLEX) 3014 mumps->sym = 2; 3015 #else 3016 if (A->spd_set && A->spd) mumps->sym = 1; 3017 else mumps->sym = 2; 3018 #endif 3019 } 3020 3021 /* set solvertype */ 3022 PetscCall(PetscFree(B->solvertype)); 3023 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3024 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3025 if (size == 1) { 3026 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3027 B->canuseordering = PETSC_TRUE; 3028 } 3029 B->ops->destroy = MatDestroy_MUMPS; 3030 B->data = (void*)mumps; 3031 3032 PetscCall(PetscInitializeMUMPS(A,mumps)); 3033 3034 *F = B; 3035 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3036 PetscFunctionReturn(0); 3037 } 3038 3039 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3040 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3041 { 3042 Mat B; 3043 Mat_MUMPS *mumps; 3044 PetscBool isSeqSBAIJ; 3045 PetscMPIInt size; 3046 3047 PetscFunctionBegin; 3048 #if defined(PETSC_USE_COMPLEX) 3049 PetscCheck(!A->hermitian || A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3050 #endif 3051 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3052 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3053 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3054 PetscCall(MatSetUp(B)); 3055 3056 PetscCall(PetscNewLog(B,&mumps)); 3057 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ)); 3058 if (isSeqSBAIJ) { 3059 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3060 } else { 3061 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3062 } 3063 3064 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3065 B->ops->view = MatView_MUMPS; 3066 B->ops->getinfo = MatGetInfo_MUMPS; 3067 3068 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3069 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3070 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3071 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3072 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3073 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3074 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3075 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3076 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3077 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3078 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3079 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3080 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3081 3082 B->factortype = MAT_FACTOR_CHOLESKY; 3083 #if defined(PETSC_USE_COMPLEX) 3084 mumps->sym = 2; 3085 #else 3086 if (A->spd_set && A->spd) mumps->sym = 1; 3087 else mumps->sym = 2; 3088 #endif 3089 3090 /* set solvertype */ 3091 PetscCall(PetscFree(B->solvertype)); 3092 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3093 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3094 if (size == 1) { 3095 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3096 B->canuseordering = PETSC_TRUE; 3097 } 3098 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3099 B->ops->destroy = MatDestroy_MUMPS; 3100 B->data = (void*)mumps; 3101 3102 PetscCall(PetscInitializeMUMPS(A,mumps)); 3103 3104 *F = B; 3105 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3106 PetscFunctionReturn(0); 3107 } 3108 3109 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3110 { 3111 Mat B; 3112 Mat_MUMPS *mumps; 3113 PetscBool isSeqBAIJ; 3114 PetscMPIInt size; 3115 3116 PetscFunctionBegin; 3117 /* Create the factorization matrix */ 3118 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ)); 3119 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3120 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3121 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3122 PetscCall(MatSetUp(B)); 3123 3124 PetscCall(PetscNewLog(B,&mumps)); 3125 if (ftype == MAT_FACTOR_LU) { 3126 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3127 B->factortype = MAT_FACTOR_LU; 3128 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3129 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3130 mumps->sym = 0; 3131 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3132 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3133 3134 B->ops->view = MatView_MUMPS; 3135 B->ops->getinfo = MatGetInfo_MUMPS; 3136 3137 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3138 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3139 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3140 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3141 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3142 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3143 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3144 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3145 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3146 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3147 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3148 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3149 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3150 3151 /* set solvertype */ 3152 PetscCall(PetscFree(B->solvertype)); 3153 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3154 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3155 if (size == 1) { 3156 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3157 B->canuseordering = PETSC_TRUE; 3158 } 3159 B->ops->destroy = MatDestroy_MUMPS; 3160 B->data = (void*)mumps; 3161 3162 PetscCall(PetscInitializeMUMPS(A,mumps)); 3163 3164 *F = B; 3165 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3166 PetscFunctionReturn(0); 3167 } 3168 3169 /* MatGetFactor for Seq and MPI SELL matrices */ 3170 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3171 { 3172 Mat B; 3173 Mat_MUMPS *mumps; 3174 PetscBool isSeqSELL; 3175 PetscMPIInt size; 3176 3177 PetscFunctionBegin; 3178 /* Create the factorization matrix */ 3179 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL)); 3180 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3181 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3182 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3183 PetscCall(MatSetUp(B)); 3184 3185 PetscCall(PetscNewLog(B,&mumps)); 3186 3187 B->ops->view = MatView_MUMPS; 3188 B->ops->getinfo = MatGetInfo_MUMPS; 3189 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3198 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3201 3202 if (ftype == MAT_FACTOR_LU) { 3203 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3204 B->factortype = MAT_FACTOR_LU; 3205 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3206 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3207 mumps->sym = 0; 3208 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3209 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3210 3211 /* set solvertype */ 3212 PetscCall(PetscFree(B->solvertype)); 3213 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3214 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3215 if (size == 1) { 3216 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3217 B->canuseordering = PETSC_TRUE; 3218 } 3219 B->ops->destroy = MatDestroy_MUMPS; 3220 B->data = (void*)mumps; 3221 3222 PetscCall(PetscInitializeMUMPS(A,mumps)); 3223 3224 *F = B; 3225 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3226 PetscFunctionReturn(0); 3227 } 3228 3229 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3230 { 3231 PetscFunctionBegin; 3232 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3233 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3234 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3235 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3236 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3237 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3238 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3239 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3240 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3241 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3242 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps)); 3243 PetscFunctionReturn(0); 3244 } 3245