1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 #if PetscDefined(USE_64BIT_INDICES) 71 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 72 #endif 73 *b = (PetscMUMPSInt)(a); 74 PetscFunctionReturn(0); 75 } 76 77 /* Put these utility routines here since they are only used in this file */ 78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 79 { 80 PetscInt myval; 81 PetscBool myset; 82 PetscFunctionBegin; 83 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 84 PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub)); 85 if (myset) PetscCall(PetscMUMPSIntCast(myval,value)); 86 if (set) *set = myset; 87 PetscFunctionReturn(0); 88 } 89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 90 91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 92 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 93 #define PetscMUMPS_c(mumps) \ 94 do { \ 95 if (mumps->use_petsc_omp_support) { \ 96 if (mumps->is_omp_master) { \ 97 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 98 MUMPS_c(&mumps->id); \ 99 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 100 } \ 101 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 102 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 103 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 104 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 105 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 106 */ \ 107 PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\ 108 PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm));\ 109 PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm));\ 110 } else { \ 111 MUMPS_c(&mumps->id); \ 112 } \ 113 } while (0) 114 #else 115 #define PetscMUMPS_c(mumps) \ 116 do { MUMPS_c(&mumps->id); } while (0) 117 #endif 118 119 /* declare MumpsScalar */ 120 #if defined(PETSC_USE_COMPLEX) 121 #if defined(PETSC_USE_REAL_SINGLE) 122 #define MumpsScalar mumps_complex 123 #else 124 #define MumpsScalar mumps_double_complex 125 #endif 126 #else 127 #define MumpsScalar PetscScalar 128 #endif 129 130 /* macros s.t. indices match MUMPS documentation */ 131 #define ICNTL(I) icntl[(I)-1] 132 #define CNTL(I) cntl[(I)-1] 133 #define INFOG(I) infog[(I)-1] 134 #define INFO(I) info[(I)-1] 135 #define RINFOG(I) rinfog[(I)-1] 136 #define RINFO(I) rinfo[(I)-1] 137 138 typedef struct Mat_MUMPS Mat_MUMPS; 139 struct Mat_MUMPS { 140 #if defined(PETSC_USE_COMPLEX) 141 #if defined(PETSC_USE_REAL_SINGLE) 142 CMUMPS_STRUC_C id; 143 #else 144 ZMUMPS_STRUC_C id; 145 #endif 146 #else 147 #if defined(PETSC_USE_REAL_SINGLE) 148 SMUMPS_STRUC_C id; 149 #else 150 DMUMPS_STRUC_C id; 151 #endif 152 #endif 153 154 MatStructure matstruc; 155 PetscMPIInt myid,petsc_size; 156 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 157 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 158 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 159 PetscMUMPSInt sym; 160 MPI_Comm mumps_comm; 161 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 162 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 163 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 164 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 165 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 166 PetscInt *rhs_nrow,max_nrhs; 167 PetscMPIInt *rhs_recvcounts,*rhs_disps; 168 PetscScalar *rhs_loc,*rhs_recvbuf; 169 #endif 170 Vec b_seq,x_seq; 171 PetscInt ninfo,*info; /* which INFO to display */ 172 PetscInt sizeredrhs; 173 PetscScalar *schur_sol; 174 PetscInt schur_sizesol; 175 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 176 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 177 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 178 179 /* stuff used by petsc/mumps OpenMP support*/ 180 PetscBool use_petsc_omp_support; 181 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 182 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 183 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 184 PetscMPIInt tag,omp_comm_size; 185 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 186 MPI_Request *reqs; 187 }; 188 189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 190 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 191 */ 192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 193 { 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 PetscCall(PetscFree(mumps->ia_alloc)); 202 PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc)); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 PetscCall(PetscFree(mumps->ja_alloc)); 207 PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc)); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]))); 211 for (i=0; i<nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]))); 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps)); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscFunctionBegin; 226 PetscCall(PetscFree(mumps->id.listvar_schur)); 227 PetscCall(PetscFree(mumps->id.redrhs)); 228 PetscCall(PetscFree(mumps->schur_sol)); 229 mumps->id.size_schur = 0; 230 mumps->id.schur_lld = 0; 231 mumps->id.ICNTL(19) = 0; 232 PetscFunctionReturn(0); 233 } 234 235 /* solve with rhs in mumps->id.redrhs and return in the same location */ 236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 237 { 238 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 239 Mat S,B,X; 240 MatFactorSchurStatus schurstatus; 241 PetscInt sizesol; 242 243 PetscFunctionBegin; 244 PetscCall(MatFactorFactorizeSchurComplement(F)); 245 PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus)); 246 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B)); 247 PetscCall(MatSetType(B,((PetscObject)S)->type_name)); 248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 249 PetscCall(MatBindToCPU(B,S->boundtocpu)); 250 #endif 251 switch (schurstatus) { 252 case MAT_FACTOR_SCHUR_FACTORED: 253 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X)); 254 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 256 PetscCall(MatBindToCPU(X,S->boundtocpu)); 257 #endif 258 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 259 PetscCall(MatMatSolveTranspose(S,B,X)); 260 } else { 261 PetscCall(MatMatSolve(S,B,X)); 262 } 263 break; 264 case MAT_FACTOR_SCHUR_INVERTED: 265 sizesol = mumps->id.nrhs*mumps->id.size_schur; 266 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 267 PetscCall(PetscFree(mumps->schur_sol)); 268 PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol)); 269 mumps->schur_sizesol = sizesol; 270 } 271 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X)); 272 PetscCall(MatSetType(X,((PetscObject)S)->type_name)); 273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 274 PetscCall(MatBindToCPU(X,S->boundtocpu)); 275 #endif 276 PetscCall(MatProductCreateWithMat(S,B,NULL,X)); 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 PetscCall(MatProductSetType(X,MATPRODUCT_AtB)); 279 } else { 280 PetscCall(MatProductSetType(X,MATPRODUCT_AB)); 281 } 282 PetscCall(MatProductSetFromOptions(X)); 283 PetscCall(MatProductSymbolic(X)); 284 PetscCall(MatProductNumeric(X)); 285 286 PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN)); 287 break; 288 default: 289 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 290 } 291 PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus)); 292 PetscCall(MatDestroy(&B)); 293 PetscCall(MatDestroy(&X)); 294 PetscFunctionReturn(0); 295 } 296 297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 298 { 299 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 300 301 PetscFunctionBegin; 302 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 303 PetscFunctionReturn(0); 304 } 305 if (!expansion) { /* prepare for the condensation step */ 306 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 307 /* allocate MUMPS internal array to store reduced right-hand sides */ 308 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 309 PetscCall(PetscFree(mumps->id.redrhs)); 310 mumps->id.lredrhs = mumps->id.size_schur; 311 PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs)); 312 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 313 } 314 mumps->id.ICNTL(26) = 1; /* condensation phase */ 315 } else { /* prepare for the expansion step */ 316 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 317 PetscCall(MatMumpsSolveSchur_Private(F)); 318 mumps->id.ICNTL(26) = 2; /* expansion phase */ 319 PetscMUMPS_c(mumps); 320 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 321 /* restore defaults */ 322 mumps->id.ICNTL(26) = -1; 323 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 324 if (mumps->id.nrhs > 1) { 325 PetscCall(PetscFree(mumps->id.redrhs)); 326 mumps->id.lredrhs = 0; 327 mumps->sizeredrhs = 0; 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 /* 334 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 335 336 input: 337 A - matrix in aij,baij or sbaij format 338 shift - 0: C style output triple; 1: Fortran style output triple. 339 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 340 MAT_REUSE_MATRIX: only the values in v array are updated 341 output: 342 nnz - dim of r, c, and v (number of local nonzero entries of A) 343 r, c, v - row and col index, matrix values (matrix triples) 344 345 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 346 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 347 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 348 349 */ 350 351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 352 { 353 const PetscScalar *av; 354 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 355 PetscInt64 nz,rnz,i,j,k; 356 PetscMUMPSInt *row,*col; 357 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 358 359 PetscFunctionBegin; 360 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 361 mumps->val = (PetscScalar*)av; 362 if (reuse == MAT_INITIAL_MATRIX) { 363 nz = aa->nz; 364 ai = aa->i; 365 aj = aa->j; 366 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 367 for (i=k=0; i<M; i++) { 368 rnz = ai[i+1] - ai[i]; 369 ajj = aj + ai[i]; 370 for (j=0; j<rnz; j++) { 371 PetscCall(PetscMUMPSIntCast(i+shift,&row[k])); 372 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k])); 373 k++; 374 } 375 } 376 mumps->irn = row; 377 mumps->jcn = col; 378 mumps->nnz = nz; 379 } 380 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 381 PetscFunctionReturn(0); 382 } 383 384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 385 { 386 PetscInt64 nz,i,j,k,r; 387 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 388 PetscMUMPSInt *row,*col; 389 390 PetscFunctionBegin; 391 mumps->val = a->val; 392 if (reuse == MAT_INITIAL_MATRIX) { 393 nz = a->sliidx[a->totalslices]; 394 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 395 for (i=k=0; i<a->totalslices; i++) { 396 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 397 PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++])); 398 } 399 } 400 for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i])); 401 mumps->irn = row; 402 mumps->jcn = col; 403 mumps->nnz = nz; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 409 { 410 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 411 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 412 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 413 PetscInt bs; 414 PetscMUMPSInt *row,*col; 415 416 PetscFunctionBegin; 417 PetscCall(MatGetBlockSize(A,&bs)); 418 M = A->rmap->N/bs; 419 mumps->val = aa->a; 420 if (reuse == MAT_INITIAL_MATRIX) { 421 ai = aa->i; aj = aa->j; 422 nz = bs2*aa->nz; 423 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 424 for (i=0; i<M; i++) { 425 ajj = aj + ai[i]; 426 rnz = ai[i+1] - ai[i]; 427 for (k=0; k<rnz; k++) { 428 for (j=0; j<bs; j++) { 429 for (m=0; m<bs; m++) { 430 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx])); 431 PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx])); 432 idx++; 433 } 434 } 435 } 436 } 437 mumps->irn = row; 438 mumps->jcn = col; 439 mumps->nnz = nz; 440 } 441 PetscFunctionReturn(0); 442 } 443 444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 445 { 446 const PetscInt *ai, *aj,*ajj; 447 PetscInt bs; 448 PetscInt64 nz,rnz,i,j,k,m; 449 PetscMUMPSInt *row,*col; 450 PetscScalar *val; 451 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 452 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 453 #if defined(PETSC_USE_COMPLEX) 454 PetscBool hermitian; 455 #endif 456 457 PetscFunctionBegin; 458 #if defined(PETSC_USE_COMPLEX) 459 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 460 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 461 #endif 462 ai = aa->i; 463 aj = aa->j; 464 PetscCall(MatGetBlockSize(A,&bs)); 465 if (reuse == MAT_INITIAL_MATRIX) { 466 nz = aa->nz; 467 PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col)); 468 if (bs>1) { 469 PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc)); 470 mumps->val = mumps->val_alloc; 471 } else { 472 mumps->val = aa->a; 473 } 474 mumps->irn = row; 475 mumps->jcn = col; 476 } else { 477 if (bs == 1) mumps->val = aa->a; 478 row = mumps->irn; 479 col = mumps->jcn; 480 } 481 val = mumps->val; 482 483 nz = 0; 484 if (bs>1) { 485 for (i=0; i<mbs; i++) { 486 rnz = ai[i+1] - ai[i]; 487 ajj = aj + ai[i]; 488 for (j=0; j<rnz; j++) { 489 for (k=0; k<bs; k++) { 490 for (m=0; m<bs; m++) { 491 if (ajj[j]>i || k>=m) { 492 if (reuse == MAT_INITIAL_MATRIX) { 493 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz])); 494 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz])); 495 } 496 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 497 } 498 } 499 } 500 } 501 } 502 } else if (reuse == MAT_INITIAL_MATRIX) { 503 for (i=0; i<mbs; i++) { 504 rnz = ai[i+1] - ai[i]; 505 ajj = aj + ai[i]; 506 for (j=0; j<rnz; j++) { 507 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 508 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 509 nz++; 510 } 511 } 512 PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 513 } 514 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 515 PetscFunctionReturn(0); 516 } 517 518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 519 { 520 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 521 PetscInt64 nz,rnz,i,j; 522 const PetscScalar *av,*v1; 523 PetscScalar *val; 524 PetscMUMPSInt *row,*col; 525 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 526 PetscBool missing; 527 #if defined(PETSC_USE_COMPLEX) 528 PetscBool hermitian; 529 #endif 530 531 PetscFunctionBegin; 532 #if defined(PETSC_USE_COMPLEX) 533 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 534 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 535 #endif 536 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 537 ai = aa->i; aj = aa->j; 538 adiag = aa->diag; 539 PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL)); 540 if (reuse == MAT_INITIAL_MATRIX) { 541 /* count nz in the upper triangular part of A */ 542 nz = 0; 543 if (missing) { 544 for (i=0; i<M; i++) { 545 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 546 for (j=ai[i];j<ai[i+1];j++) { 547 if (aj[j] < i) continue; 548 nz++; 549 } 550 } else { 551 nz += ai[i+1] - adiag[i]; 552 } 553 } 554 } else { 555 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 556 } 557 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 558 PetscCall(PetscMalloc1(nz,&val)); 559 mumps->nnz = nz; 560 mumps->irn = row; 561 mumps->jcn = col; 562 mumps->val = mumps->val_alloc = val; 563 564 nz = 0; 565 if (missing) { 566 for (i=0; i<M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 568 for (j=ai[i];j<ai[i+1];j++) { 569 if (aj[j] < i) continue; 570 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 571 PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz])); 572 val[nz] = av[j]; 573 nz++; 574 } 575 } else { 576 rnz = ai[i+1] - adiag[i]; 577 ajj = aj + adiag[i]; 578 v1 = av + adiag[i]; 579 for (j=0; j<rnz; j++) { 580 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 581 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 582 val[nz++] = v1[j]; 583 } 584 } 585 } 586 } else { 587 for (i=0; i<M; i++) { 588 rnz = ai[i+1] - adiag[i]; 589 ajj = aj + adiag[i]; 590 v1 = av + adiag[i]; 591 for (j=0; j<rnz; j++) { 592 PetscCall(PetscMUMPSIntCast(i+shift,&row[nz])); 593 PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 594 val[nz++] = v1[j]; 595 } 596 } 597 } 598 } else { 599 nz = 0; 600 val = mumps->val; 601 if (missing) { 602 for (i=0; i <M; i++) { 603 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 604 for (j=ai[i];j<ai[i+1];j++) { 605 if (aj[j] < i) continue; 606 val[nz++] = av[j]; 607 } 608 } else { 609 rnz = ai[i+1] - adiag[i]; 610 v1 = av + adiag[i]; 611 for (j=0; j<rnz; j++) { 612 val[nz++] = v1[j]; 613 } 614 } 615 } 616 } else { 617 for (i=0; i <M; i++) { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } 626 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 631 { 632 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 633 PetscInt bs; 634 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 635 PetscMUMPSInt *row,*col; 636 const PetscScalar *av,*bv,*v1,*v2; 637 PetscScalar *val; 638 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 639 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 640 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 641 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 642 #if defined(PETSC_USE_COMPLEX) 643 PetscBool hermitian; 644 #endif 645 646 PetscFunctionBegin; 647 #if defined(PETSC_USE_COMPLEX) 648 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 649 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 650 #endif 651 PetscCall(MatGetBlockSize(A,&bs)); 652 rstart = A->rmap->rstart; 653 ai = aa->i; 654 aj = aa->j; 655 bi = bb->i; 656 bj = bb->j; 657 av = aa->a; 658 bv = bb->a; 659 660 garray = mat->garray; 661 662 if (reuse == MAT_INITIAL_MATRIX) { 663 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 664 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 665 PetscCall(PetscMalloc1(nz,&val)); 666 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 667 mumps->irn = row; 668 mumps->jcn = col; 669 mumps->val = mumps->val_alloc = val; 670 } else { 671 val = mumps->val; 672 } 673 674 jj = 0; irow = rstart; 675 for (i=0; i<mbs; i++) { 676 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 677 countA = ai[i+1] - ai[i]; 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 v1 = av + ai[i]*bs2; 681 v2 = bv + bi[i]*bs2; 682 683 if (bs>1) { 684 /* A-part */ 685 for (j=0; j<countA; j++) { 686 for (k=0; k<bs; k++) { 687 for (m=0; m<bs; m++) { 688 if (rstart + ajj[j]*bs>irow || k>=m) { 689 if (reuse == MAT_INITIAL_MATRIX) { 690 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 691 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj])); 692 } 693 val[jj++] = v1[j*bs2 + m + k*bs]; 694 } 695 } 696 } 697 } 698 699 /* B-part */ 700 for (j=0; j < countB; j++) { 701 for (k=0; k<bs; k++) { 702 for (m=0; m<bs; m++) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 705 PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj])); 706 } 707 val[jj++] = v2[j*bs2 + m + k*bs]; 708 } 709 } 710 } 711 } else { 712 /* A-part */ 713 for (j=0; j<countA; j++) { 714 if (reuse == MAT_INITIAL_MATRIX) { 715 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 716 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 717 } 718 val[jj++] = v1[j]; 719 } 720 721 /* B-part */ 722 for (j=0; j < countB; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 725 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 726 } 727 val[jj++] = v2[j]; 728 } 729 } 730 irow+=bs; 731 } 732 mumps->nnz = jj; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 737 { 738 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 739 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 740 PetscMUMPSInt *row,*col; 741 const PetscScalar *av, *bv,*v1,*v2; 742 PetscScalar *val; 743 Mat Ad,Ao; 744 Mat_SeqAIJ *aa; 745 Mat_SeqAIJ *bb; 746 747 PetscFunctionBegin; 748 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 749 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 750 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 751 752 aa = (Mat_SeqAIJ*)(Ad)->data; 753 bb = (Mat_SeqAIJ*)(Ao)->data; 754 ai = aa->i; 755 aj = aa->j; 756 bi = bb->i; 757 bj = bb->j; 758 759 rstart = A->rmap->rstart; 760 761 if (reuse == MAT_INITIAL_MATRIX) { 762 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 763 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 764 PetscCall(PetscMalloc1(nz,&val)); 765 mumps->nnz = nz; 766 mumps->irn = row; 767 mumps->jcn = col; 768 mumps->val = mumps->val_alloc = val; 769 } else { 770 val = mumps->val; 771 } 772 773 jj = 0; irow = rstart; 774 for (i=0; i<m; i++) { 775 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 776 countA = ai[i+1] - ai[i]; 777 countB = bi[i+1] - bi[i]; 778 bjj = bj + bi[i]; 779 v1 = av + ai[i]; 780 v2 = bv + bi[i]; 781 782 /* A-part */ 783 for (j=0; j<countA; j++) { 784 if (reuse == MAT_INITIAL_MATRIX) { 785 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 786 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 787 } 788 val[jj++] = v1[j]; 789 } 790 791 /* B-part */ 792 for (j=0; j < countB; j++) { 793 if (reuse == MAT_INITIAL_MATRIX) { 794 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 795 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 796 } 797 val[jj++] = v2[j]; 798 } 799 irow++; 800 } 801 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 802 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 803 PetscFunctionReturn(0); 804 } 805 806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 807 { 808 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 809 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 810 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 811 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 812 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 813 const PetscInt bs2=mat->bs2; 814 PetscInt bs; 815 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 816 PetscMUMPSInt *row,*col; 817 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 818 PetscScalar *val; 819 820 PetscFunctionBegin; 821 PetscCall(MatGetBlockSize(A,&bs)); 822 if (reuse == MAT_INITIAL_MATRIX) { 823 nz = bs2*(aa->nz + bb->nz); 824 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 825 PetscCall(PetscMalloc1(nz,&val)); 826 mumps->nnz = nz; 827 mumps->irn = row; 828 mumps->jcn = col; 829 mumps->val = mumps->val_alloc = val; 830 } else { 831 val = mumps->val; 832 } 833 834 jj = 0; irow = rstart; 835 for (i=0; i<mbs; i++) { 836 countA = ai[i+1] - ai[i]; 837 countB = bi[i+1] - bi[i]; 838 ajj = aj + ai[i]; 839 bjj = bj + bi[i]; 840 v1 = av + bs2*ai[i]; 841 v2 = bv + bs2*bi[i]; 842 843 idx = 0; 844 /* A-part */ 845 for (k=0; k<countA; k++) { 846 for (j=0; j<bs; j++) { 847 for (n=0; n<bs; n++) { 848 if (reuse == MAT_INITIAL_MATRIX) { 849 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 850 PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj])); 851 } 852 val[jj++] = v1[idx++]; 853 } 854 } 855 } 856 857 idx = 0; 858 /* B-part */ 859 for (k=0; k<countB; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 864 PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj])); 865 } 866 val[jj++] = v2[idx++]; 867 } 868 } 869 } 870 irow += bs; 871 } 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 876 { 877 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 878 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 879 PetscMUMPSInt *row,*col; 880 const PetscScalar *av, *bv,*v1,*v2; 881 PetscScalar *val; 882 Mat Ad,Ao; 883 Mat_SeqAIJ *aa; 884 Mat_SeqAIJ *bb; 885 #if defined(PETSC_USE_COMPLEX) 886 PetscBool hermitian; 887 #endif 888 889 PetscFunctionBegin; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 892 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 893 #endif 894 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 895 PetscCall(MatSeqAIJGetArrayRead(Ad,&av)); 896 PetscCall(MatSeqAIJGetArrayRead(Ao,&bv)); 897 898 aa = (Mat_SeqAIJ*)(Ad)->data; 899 bb = (Mat_SeqAIJ*)(Ao)->data; 900 ai = aa->i; 901 aj = aa->j; 902 adiag = aa->diag; 903 bi = bb->i; 904 bj = bb->j; 905 906 rstart = A->rmap->rstart; 907 908 if (reuse == MAT_INITIAL_MATRIX) { 909 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 910 nzb = 0; /* num of upper triangular entries in mat->B */ 911 for (i=0; i<m; i++) { 912 nza += (ai[i+1] - adiag[i]); 913 countB = bi[i+1] - bi[i]; 914 bjj = bj + bi[i]; 915 for (j=0; j<countB; j++) { 916 if (garray[bjj[j]] > rstart) nzb++; 917 } 918 } 919 920 nz = nza + nzb; /* total nz of upper triangular part of mat */ 921 PetscCall(PetscMalloc2(nz,&row,nz,&col)); 922 PetscCall(PetscMalloc1(nz,&val)); 923 mumps->nnz = nz; 924 mumps->irn = row; 925 mumps->jcn = col; 926 mumps->val = mumps->val_alloc = val; 927 } else { 928 val = mumps->val; 929 } 930 931 jj = 0; irow = rstart; 932 for (i=0; i<m; i++) { 933 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 934 v1 = av + adiag[i]; 935 countA = ai[i+1] - adiag[i]; 936 countB = bi[i+1] - bi[i]; 937 bjj = bj + bi[i]; 938 v2 = bv + bi[i]; 939 940 /* A-part */ 941 for (j=0; j<countA; j++) { 942 if (reuse == MAT_INITIAL_MATRIX) { 943 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 944 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 945 } 946 val[jj++] = v1[j]; 947 } 948 949 /* B-part */ 950 for (j=0; j < countB; j++) { 951 if (garray[bjj[j]] > rstart) { 952 if (reuse == MAT_INITIAL_MATRIX) { 953 PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj])); 954 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 955 } 956 val[jj++] = v2[j]; 957 } 958 } 959 irow++; 960 } 961 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av)); 962 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv)); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatDestroy_MUMPS(Mat A) 967 { 968 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 969 970 PetscFunctionBegin; 971 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 972 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 973 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 974 PetscCall(VecDestroy(&mumps->b_seq)); 975 PetscCall(VecDestroy(&mumps->x_seq)); 976 PetscCall(PetscFree(mumps->id.perm_in)); 977 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 978 PetscCall(PetscFree(mumps->val_alloc)); 979 PetscCall(PetscFree(mumps->info)); 980 PetscCall(MatMumpsResetSchur_Private(mumps)); 981 mumps->id.job = JOB_END; 982 PetscMUMPS_c(mumps); 983 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 984 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 985 if (mumps->use_petsc_omp_support) { 986 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 987 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 988 PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps)); 989 } 990 #endif 991 PetscCall(PetscFree(mumps->ia_alloc)); 992 PetscCall(PetscFree(mumps->ja_alloc)); 993 PetscCall(PetscFree(mumps->recvcount)); 994 PetscCall(PetscFree(mumps->reqs)); 995 PetscCall(PetscFree(mumps->irhs_loc)); 996 if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 997 PetscCall(PetscFree(A->data)); 998 999 /* clear composed functions */ 1000 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1001 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL)); 1002 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL)); 1003 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL)); 1004 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL)); 1005 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL)); 1006 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL)); 1007 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL)); 1008 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL)); 1009 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL)); 1010 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL)); 1011 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL)); 1012 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1018 { 1019 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1020 const PetscMPIInt ompsize=mumps->omp_comm_size; 1021 PetscInt i,m,M,rstart; 1022 1023 PetscFunctionBegin; 1024 PetscCall(MatGetSize(A,&M,NULL)); 1025 PetscCall(MatGetLocalSize(A,&m,NULL)); 1026 PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1027 if (ompsize == 1) { 1028 if (!mumps->irhs_loc) { 1029 mumps->nloc_rhs = m; 1030 PetscCall(PetscMalloc1(m,&mumps->irhs_loc)); 1031 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 1032 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1033 } 1034 mumps->id.rhs_loc = (MumpsScalar*)array; 1035 } else { 1036 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1037 const PetscInt *ranges; 1038 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1039 MPI_Group petsc_group,omp_group; 1040 PetscScalar *recvbuf=NULL; 1041 1042 if (mumps->is_omp_master) { 1043 /* Lazily initialize the omp stuff for distributed rhs */ 1044 if (!mumps->irhs_loc) { 1045 PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks)); 1046 PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps)); 1047 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group)); 1048 PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group)); 1049 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1050 PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks)); 1051 1052 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1053 mumps->nloc_rhs = 0; 1054 PetscCall(MatGetOwnershipRanges(A,&ranges)); 1055 for (j=0; j<ompsize; j++) { 1056 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1057 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1058 } 1059 PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc)); 1060 for (j=k=0; j<ompsize; j++) { 1061 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1062 } 1063 1064 PetscCall(PetscFree2(omp_ranks,petsc_ranks)); 1065 PetscCallMPI(MPI_Group_free(&petsc_group)); 1066 PetscCallMPI(MPI_Group_free(&omp_group)); 1067 } 1068 1069 /* Realloc buffers when current nrhs is bigger than what we have met */ 1070 if (nrhs > mumps->max_nrhs) { 1071 PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 1072 PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf)); 1073 mumps->max_nrhs = nrhs; 1074 } 1075 1076 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1077 for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j])); 1078 mumps->rhs_disps[0] = 0; 1079 for (j=1; j<ompsize; j++) { 1080 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1081 PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1082 } 1083 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1084 } 1085 1086 PetscCall(PetscMPIIntCast(m*nrhs,&sendcount)); 1087 PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm)); 1088 1089 if (mumps->is_omp_master) { 1090 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1091 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1092 for (j=0; j<ompsize; j++) { 1093 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1094 dst = dstbase; 1095 for (i=0; i<nrhs; i++) { 1096 PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j])); 1097 src += mumps->rhs_nrow[j]; 1098 dst += mumps->nloc_rhs; 1099 } 1100 dstbase += mumps->rhs_nrow[j]; 1101 } 1102 } 1103 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1104 } 1105 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1106 } 1107 mumps->id.nrhs = nrhs; 1108 mumps->id.nloc_rhs = mumps->nloc_rhs; 1109 mumps->id.lrhs_loc = mumps->nloc_rhs; 1110 mumps->id.irhs_loc = mumps->irhs_loc; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1115 { 1116 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1117 const PetscScalar *rarray = NULL; 1118 PetscScalar *array; 1119 IS is_iden,is_petsc; 1120 PetscInt i; 1121 PetscBool second_solve = PETSC_FALSE; 1122 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1123 1124 PetscFunctionBegin; 1125 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1)); 1126 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2)); 1127 1128 if (A->factorerrortype) { 1129 PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1130 PetscCall(VecSetInf(x)); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 mumps->id.nrhs = 1; 1135 if (mumps->petsc_size > 1) { 1136 if (mumps->ICNTL20 == 10) { 1137 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1138 PetscCall(VecGetArrayRead(b,&rarray)); 1139 PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray)); 1140 } else { 1141 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1142 PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1143 PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1144 if (!mumps->myid) { 1145 PetscCall(VecGetArray(mumps->b_seq,&array)); 1146 mumps->id.rhs = (MumpsScalar*)array; 1147 } 1148 } 1149 } else { /* petsc_size == 1 */ 1150 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1151 PetscCall(VecCopy(b,x)); 1152 PetscCall(VecGetArray(x,&array)); 1153 mumps->id.rhs = (MumpsScalar*)array; 1154 } 1155 1156 /* 1157 handle condensation step of Schur complement (if any) 1158 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1159 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1160 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1161 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1162 */ 1163 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1164 PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1165 second_solve = PETSC_TRUE; 1166 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1167 } 1168 /* solve phase */ 1169 /*-------------*/ 1170 mumps->id.job = JOB_SOLVE; 1171 PetscMUMPS_c(mumps); 1172 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1173 1174 /* handle expansion step of Schur complement (if any) */ 1175 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1176 1177 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1178 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1179 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1180 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1181 } 1182 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1183 PetscInt *isol2_loc=NULL; 1184 PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */ 1185 PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc)); 1186 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1187 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc)); /* to */ 1188 PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol)); 1189 PetscCall(ISDestroy(&is_iden)); 1190 PetscCall(ISDestroy(&is_petsc)); 1191 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1192 } 1193 1194 PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1195 PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1196 } 1197 1198 if (mumps->petsc_size > 1) { 1199 if (mumps->ICNTL20 == 10) { 1200 PetscCall(VecRestoreArrayRead(b,&rarray)); 1201 } else if (!mumps->myid) { 1202 PetscCall(VecRestoreArray(mumps->b_seq,&array)); 1203 } 1204 } else PetscCall(VecRestoreArray(x,&array)); 1205 1206 PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3))); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1211 { 1212 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1213 1214 PetscFunctionBegin; 1215 mumps->id.ICNTL(9) = 0; 1216 PetscCall(MatSolve_MUMPS(A,b,x)); 1217 mumps->id.ICNTL(9) = 1; 1218 PetscFunctionReturn(0); 1219 } 1220 1221 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1222 { 1223 Mat Bt = NULL; 1224 PetscBool denseX,denseB,flg,flgT; 1225 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1226 PetscInt i,nrhs,M; 1227 PetscScalar *array; 1228 const PetscScalar *rbray; 1229 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1230 PetscMUMPSInt *isol_loc,*isol_loc_save; 1231 PetscScalar *bray,*sol_loc,*sol_loc_save; 1232 IS is_to,is_from; 1233 PetscInt k,proc,j,m,myrstart; 1234 const PetscInt *rstart; 1235 Vec v_mpi,msol_loc; 1236 VecScatter scat_sol; 1237 Vec b_seq; 1238 VecScatter scat_rhs; 1239 PetscScalar *aa; 1240 PetscInt spnr,*ia,*ja; 1241 Mat_MPIAIJ *b = NULL; 1242 1243 PetscFunctionBegin; 1244 PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL)); 1245 PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1246 1247 PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL)); 1248 if (denseB) { 1249 PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1250 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1251 } else { /* sparse B */ 1252 PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1253 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT)); 1254 if (flgT) { /* input B is transpose of actural RHS matrix, 1255 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1256 PetscCall(MatTransposeGetMat(B,&Bt)); 1257 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1258 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1259 } 1260 1261 PetscCall(MatGetSize(B,&M,&nrhs)); 1262 mumps->id.nrhs = nrhs; 1263 mumps->id.lrhs = M; 1264 mumps->id.rhs = NULL; 1265 1266 if (mumps->petsc_size == 1) { 1267 PetscScalar *aa; 1268 PetscInt spnr,*ia,*ja; 1269 PetscBool second_solve = PETSC_FALSE; 1270 1271 PetscCall(MatDenseGetArray(X,&array)); 1272 mumps->id.rhs = (MumpsScalar*)array; 1273 1274 if (denseB) { 1275 /* copy B to X */ 1276 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1277 PetscCall(PetscArraycpy(array,rbray,M*nrhs)); 1278 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1279 } else { /* sparse B */ 1280 PetscCall(MatSeqAIJGetArray(Bt,&aa)); 1281 PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1282 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1283 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1284 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1285 } 1286 /* handle condensation step of Schur complement (if any) */ 1287 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1288 second_solve = PETSC_TRUE; 1289 PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1290 } 1291 /* solve phase */ 1292 /*-------------*/ 1293 mumps->id.job = JOB_SOLVE; 1294 PetscMUMPS_c(mumps); 1295 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1296 1297 /* handle expansion step of Schur complement (if any) */ 1298 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1299 if (!denseB) { /* sparse B */ 1300 PetscCall(MatSeqAIJRestoreArray(Bt,&aa)); 1301 PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1302 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1303 } 1304 PetscCall(MatDenseRestoreArray(X,&array)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1309 PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1310 1311 /* create msol_loc to hold mumps local solution */ 1312 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1313 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1314 1315 lsol_loc = mumps->id.lsol_loc; 1316 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1317 PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc)); 1318 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1319 mumps->id.isol_loc = isol_loc; 1320 1321 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc)); 1322 1323 if (denseB) { 1324 if (mumps->ICNTL20 == 10) { 1325 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1326 PetscCall(MatDenseGetArrayRead(B,&rbray)); 1327 PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray)); 1328 PetscCall(MatDenseRestoreArrayRead(B,&rbray)); 1329 PetscCall(MatGetLocalSize(B,&m,NULL)); 1330 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi)); 1331 } else { 1332 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1333 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1334 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1335 0, re-arrange B into desired order, which is a local operation. 1336 */ 1337 1338 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1339 /* wrap dense rhs matrix B into a vector v_mpi */ 1340 PetscCall(MatGetLocalSize(B,&m,NULL)); 1341 PetscCall(MatDenseGetArray(B,&bray)); 1342 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1343 PetscCall(MatDenseRestoreArray(B,&bray)); 1344 1345 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1346 if (!mumps->myid) { 1347 PetscInt *idx; 1348 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1349 PetscCall(PetscMalloc1(nrhs*M,&idx)); 1350 PetscCall(MatGetOwnershipRanges(B,&rstart)); 1351 k = 0; 1352 for (proc=0; proc<mumps->petsc_size; proc++) { 1353 for (j=0; j<nrhs; j++) { 1354 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1355 } 1356 } 1357 1358 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq)); 1359 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to)); 1360 PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from)); 1361 } else { 1362 PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq)); 1363 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to)); 1364 PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from)); 1365 } 1366 PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs)); 1367 PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1368 PetscCall(ISDestroy(&is_to)); 1369 PetscCall(ISDestroy(&is_from)); 1370 PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1371 1372 if (!mumps->myid) { /* define rhs on the host */ 1373 PetscCall(VecGetArray(b_seq,&bray)); 1374 mumps->id.rhs = (MumpsScalar*)bray; 1375 PetscCall(VecRestoreArray(b_seq,&bray)); 1376 } 1377 } 1378 } else { /* sparse B */ 1379 b = (Mat_MPIAIJ*)Bt->data; 1380 1381 /* wrap dense X into a vector v_mpi */ 1382 PetscCall(MatGetLocalSize(X,&m,NULL)); 1383 PetscCall(MatDenseGetArray(X,&bray)); 1384 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1385 PetscCall(MatDenseRestoreArray(X,&bray)); 1386 1387 if (!mumps->myid) { 1388 PetscCall(MatSeqAIJGetArray(b->A,&aa)); 1389 PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1390 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1391 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1392 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1393 } else { 1394 mumps->id.irhs_ptr = NULL; 1395 mumps->id.irhs_sparse = NULL; 1396 mumps->id.nz_rhs = 0; 1397 mumps->id.rhs_sparse = NULL; 1398 } 1399 } 1400 1401 /* solve phase */ 1402 /*-------------*/ 1403 mumps->id.job = JOB_SOLVE; 1404 PetscMUMPS_c(mumps); 1405 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1406 1407 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1408 PetscCall(MatDenseGetArray(X,&array)); 1409 PetscCall(VecPlaceArray(v_mpi,array)); 1410 1411 /* create scatter scat_sol */ 1412 PetscCall(MatGetOwnershipRanges(X,&rstart)); 1413 /* iidx: index for scatter mumps solution to petsc X */ 1414 1415 PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from)); 1416 PetscCall(PetscMalloc1(nlsol_loc,&idxx)); 1417 for (i=0; i<lsol_loc; i++) { 1418 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1419 1420 for (proc=0; proc<mumps->petsc_size; proc++) { 1421 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1422 myrstart = rstart[proc]; 1423 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1424 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1425 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1426 break; 1427 } 1428 } 1429 1430 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1431 } 1432 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to)); 1433 PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol)); 1434 PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1435 PetscCall(ISDestroy(&is_from)); 1436 PetscCall(ISDestroy(&is_to)); 1437 PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1438 PetscCall(MatDenseRestoreArray(X,&array)); 1439 1440 /* free spaces */ 1441 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1442 mumps->id.isol_loc = isol_loc_save; 1443 1444 PetscCall(PetscFree2(sol_loc,isol_loc)); 1445 PetscCall(PetscFree(idxx)); 1446 PetscCall(VecDestroy(&msol_loc)); 1447 PetscCall(VecDestroy(&v_mpi)); 1448 if (!denseB) { 1449 if (!mumps->myid) { 1450 b = (Mat_MPIAIJ*)Bt->data; 1451 PetscCall(MatSeqAIJRestoreArray(b->A,&aa)); 1452 PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1453 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1454 } 1455 } else { 1456 if (mumps->ICNTL20 == 0) { 1457 PetscCall(VecDestroy(&b_seq)); 1458 PetscCall(VecScatterDestroy(&scat_rhs)); 1459 } 1460 } 1461 PetscCall(VecScatterDestroy(&scat_sol)); 1462 PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3))); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1467 { 1468 PetscBool flg; 1469 Mat B; 1470 1471 PetscFunctionBegin; 1472 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 1473 PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1474 1475 /* Create B=Bt^T that uses Bt's data structure */ 1476 PetscCall(MatCreateTranspose(Bt,&B)); 1477 1478 PetscCall(MatMatSolve_MUMPS(A,B,X)); 1479 PetscCall(MatDestroy(&B)); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #if !defined(PETSC_USE_COMPLEX) 1484 /* 1485 input: 1486 F: numeric factor 1487 output: 1488 nneg: total number of negative pivots 1489 nzero: total number of zero pivots 1490 npos: (global dimension of F) - nneg - nzero 1491 */ 1492 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1493 { 1494 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1495 PetscMPIInt size; 1496 1497 PetscFunctionBegin; 1498 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size)); 1499 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1500 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1501 1502 if (nneg) *nneg = mumps->id.INFOG(12); 1503 if (nzero || npos) { 1504 PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1505 if (nzero) *nzero = mumps->id.INFOG(28); 1506 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 #endif 1511 1512 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1513 { 1514 PetscInt i,nreqs; 1515 PetscMUMPSInt *irn,*jcn; 1516 PetscMPIInt count; 1517 PetscInt64 totnnz,remain; 1518 const PetscInt osize=mumps->omp_comm_size; 1519 PetscScalar *val; 1520 1521 PetscFunctionBegin; 1522 if (osize > 1) { 1523 if (reuse == MAT_INITIAL_MATRIX) { 1524 /* master first gathers counts of nonzeros to receive */ 1525 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount)); 1526 PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm)); 1527 1528 /* Then each computes number of send/recvs */ 1529 if (mumps->is_omp_master) { 1530 /* Start from 1 since self communication is not done in MPI */ 1531 nreqs = 0; 1532 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1533 } else { 1534 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1535 } 1536 PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1537 1538 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1539 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1540 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1541 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1542 */ 1543 nreqs = 0; /* counter for actual send/recvs */ 1544 if (mumps->is_omp_master) { 1545 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1546 PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn)); 1547 PetscCall(PetscMalloc1(totnnz,&val)); 1548 1549 /* Self communication */ 1550 PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz)); 1551 PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz)); 1552 PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz)); 1553 1554 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1555 PetscCall(PetscFree2(mumps->irn,mumps->jcn)); 1556 PetscCall(PetscFree(mumps->val_alloc)); 1557 mumps->nnz = totnnz; 1558 mumps->irn = irn; 1559 mumps->jcn = jcn; 1560 mumps->val = mumps->val_alloc = val; 1561 1562 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1563 jcn += mumps->recvcount[0]; 1564 val += mumps->recvcount[0]; 1565 1566 /* Remote communication */ 1567 for (i=1; i<osize; i++) { 1568 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1569 remain = mumps->recvcount[i] - count; 1570 while (count>0) { 1571 PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1572 PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1573 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1574 irn += count; 1575 jcn += count; 1576 val += count; 1577 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1578 remain -= count; 1579 } 1580 } 1581 } else { 1582 irn = mumps->irn; 1583 jcn = mumps->jcn; 1584 val = mumps->val; 1585 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1586 remain = mumps->nnz - count; 1587 while (count>0) { 1588 PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1589 PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1590 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1591 irn += count; 1592 jcn += count; 1593 val += count; 1594 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1595 remain -= count; 1596 } 1597 } 1598 } else { 1599 nreqs = 0; 1600 if (mumps->is_omp_master) { 1601 val = mumps->val + mumps->recvcount[0]; 1602 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1603 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1604 remain = mumps->recvcount[i] - count; 1605 while (count>0) { 1606 PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1607 val += count; 1608 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1609 remain -= count; 1610 } 1611 } 1612 } else { 1613 val = mumps->val; 1614 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1615 remain = mumps->nnz - count; 1616 while (count>0) { 1617 PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1618 val += count; 1619 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1620 remain -= count; 1621 } 1622 } 1623 } 1624 PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE)); 1625 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1631 { 1632 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1633 PetscBool isMPIAIJ; 1634 1635 PetscFunctionBegin; 1636 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1637 if (mumps->id.INFOG(1) == -6) { 1638 PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1639 } 1640 PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1641 PetscFunctionReturn(0); 1642 } 1643 1644 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1645 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps)); 1646 1647 /* numerical factorization phase */ 1648 /*-------------------------------*/ 1649 mumps->id.job = JOB_FACTNUMERIC; 1650 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1651 if (!mumps->myid) { 1652 mumps->id.a = (MumpsScalar*)mumps->val; 1653 } 1654 } else { 1655 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1656 } 1657 PetscMUMPS_c(mumps); 1658 if (mumps->id.INFOG(1) < 0) { 1659 if (A->erroriffailure) { 1660 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1661 } else { 1662 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1663 PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1664 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1665 } else if (mumps->id.INFOG(1) == -13) { 1666 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1667 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1668 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1669 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1670 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1671 } else { 1672 PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1673 F->factorerrortype = MAT_FACTOR_OTHER; 1674 } 1675 } 1676 } 1677 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1678 1679 F->assembled = PETSC_TRUE; 1680 1681 if (F->schur) { /* reset Schur status to unfactored */ 1682 #if defined(PETSC_HAVE_CUDA) 1683 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1684 #endif 1685 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1686 mumps->id.ICNTL(19) = 2; 1687 PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur)); 1688 } 1689 PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED)); 1690 } 1691 1692 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1693 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1694 1695 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1696 if (mumps->petsc_size > 1) { 1697 PetscInt lsol_loc; 1698 PetscScalar *sol_loc; 1699 1700 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ)); 1701 1702 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1703 if (mumps->x_seq) { 1704 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1705 PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 1706 PetscCall(VecDestroy(&mumps->x_seq)); 1707 } 1708 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1709 PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc)); 1710 mumps->id.lsol_loc = lsol_loc; 1711 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1712 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq)); 1713 } 1714 PetscCall(PetscLogFlops(mumps->id.RINFO(2))); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 /* Sets MUMPS options from the options database */ 1719 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 1720 { 1721 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1722 PetscMUMPSInt icntl=0; 1723 PetscInt info[80],i,ninfo=80,rbs,cbs; 1724 PetscBool flg=PETSC_FALSE; 1725 1726 PetscFunctionBegin; 1727 PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat"); 1728 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg)); 1729 if (flg) mumps->id.ICNTL(1) = icntl; 1730 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg)); 1731 if (flg) mumps->id.ICNTL(2) = icntl; 1732 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg)); 1733 if (flg) mumps->id.ICNTL(3) = icntl; 1734 1735 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg)); 1736 if (flg) mumps->id.ICNTL(4) = icntl; 1737 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1738 1739 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg)); 1740 if (flg) mumps->id.ICNTL(6) = icntl; 1741 1742 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg)); 1743 if (flg) { 1744 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1745 mumps->id.ICNTL(7) = icntl; 1746 } 1747 1748 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL)); 1749 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1750 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL)); 1751 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL)); 1752 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL)); 1753 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL)); 1754 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL)); 1755 PetscCall(MatGetBlockSizes(A,&rbs,&cbs)); 1756 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs; 1757 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg)); 1758 if (flg) { 1759 PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled"); 1760 PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 1761 } 1762 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL)); 1763 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1764 PetscCall(MatDestroy(&F->schur)); 1765 PetscCall(MatMumpsResetSchur_Private(mumps)); 1766 } 1767 1768 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1769 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1770 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1771 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1772 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1773 In short, we could not use distributed RHS with MPICH until v4.0b1. 1774 */ 1775 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1776 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1777 #else 1778 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1779 #endif 1780 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg)); 1781 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1782 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1783 PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1784 #endif 1785 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1786 1787 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL)); 1788 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL)); 1789 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL)); 1790 if (mumps->id.ICNTL(24)) { 1791 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1792 } 1793 1794 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL)); 1795 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL)); 1796 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL)); 1797 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL)); 1798 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL)); 1799 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1800 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL)); 1801 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1802 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL)); 1803 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL)); 1804 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL)); 1805 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL)); 1806 1807 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL)); 1808 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL)); 1809 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL)); 1810 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL)); 1811 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL)); 1812 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL)); 1813 1814 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1815 1816 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL)); 1817 if (ninfo) { 1818 PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1819 PetscCall(PetscMalloc1(ninfo,&mumps->info)); 1820 mumps->ninfo = ninfo; 1821 for (i=0; i<ninfo; i++) { 1822 PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1823 mumps->info[i] = info[i]; 1824 } 1825 } 1826 1827 PetscOptionsEnd(); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatSetFromOptions_MUMPS_OpenMP(Mat F,Mat A) 1832 { 1833 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1834 PetscInt nthreads=0; 1835 1836 PetscFunctionBegin; 1837 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1838 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size)); 1839 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */ 1840 1841 PetscCall(PetscOptionsHasName(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support)); 1842 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1843 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL)); 1844 if (mumps->use_petsc_omp_support) { 1845 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1846 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl)); 1847 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master)); 1848 #else 1849 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1850 #endif 1851 } else { 1852 mumps->omp_comm = PETSC_COMM_SELF; 1853 mumps->mumps_comm = mumps->petsc_comm; 1854 mumps->is_omp_master = PETSC_TRUE; 1855 } 1856 PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size)); 1857 mumps->reqs = NULL; 1858 mumps->tag = 0; 1859 1860 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1861 if (mumps->mumps_comm != MPI_COMM_NULL) { 1862 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 1863 } 1864 1865 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1866 mumps->id.job = JOB_INIT; 1867 mumps->id.par = 1; /* host participates factorizaton and solve */ 1868 mumps->id.sym = mumps->sym; 1869 1870 PetscMUMPS_c(mumps); 1871 PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1872 1873 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1874 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1875 */ 1876 PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm)); 1877 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm)); 1878 1879 mumps->scat_rhs = NULL; 1880 mumps->scat_sol = NULL; 1881 1882 /* set PETSc-MUMPS default options - override MUMPS default */ 1883 mumps->id.ICNTL(3) = 0; 1884 mumps->id.ICNTL(4) = 0; 1885 if (mumps->petsc_size == 1) { 1886 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1887 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1888 } else { 1889 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1890 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1891 } 1892 1893 /* schur */ 1894 mumps->id.size_schur = 0; 1895 mumps->id.listvar_schur = NULL; 1896 mumps->id.schur = NULL; 1897 mumps->sizeredrhs = 0; 1898 mumps->schur_sol = NULL; 1899 mumps->schur_sizesol = 0; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1904 { 1905 PetscFunctionBegin; 1906 if (mumps->id.INFOG(1) < 0) { 1907 if (A->erroriffailure) { 1908 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1909 } else { 1910 if (mumps->id.INFOG(1) == -6) { 1911 PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1912 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1913 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1914 PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1915 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1916 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1917 PetscCall(PetscInfo(F,"Empty matrix\n")); 1918 } else { 1919 PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1920 F->factorerrortype = MAT_FACTOR_OTHER; 1921 } 1922 } 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1928 { 1929 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1930 Vec b; 1931 const PetscInt M = A->rmap->N; 1932 1933 PetscFunctionBegin; 1934 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1935 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1936 PetscFunctionReturn(0); 1937 } 1938 1939 /* Set MUMPS options from the options database */ 1940 PetscCall(MatSetFromOptions_MUMPS(F,A)); 1941 1942 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1943 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 1944 1945 /* analysis phase */ 1946 /*----------------*/ 1947 mumps->id.job = JOB_FACTSYMBOLIC; 1948 mumps->id.n = M; 1949 switch (mumps->id.ICNTL(18)) { 1950 case 0: /* centralized assembled matrix input */ 1951 if (!mumps->myid) { 1952 mumps->id.nnz = mumps->nnz; 1953 mumps->id.irn = mumps->irn; 1954 mumps->id.jcn = mumps->jcn; 1955 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1956 if (r) { 1957 mumps->id.ICNTL(7) = 1; 1958 if (!mumps->myid) { 1959 const PetscInt *idx; 1960 PetscInt i; 1961 1962 PetscCall(PetscMalloc1(M,&mumps->id.perm_in)); 1963 PetscCall(ISGetIndices(r,&idx)); 1964 for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 1965 PetscCall(ISRestoreIndices(r,&idx)); 1966 } 1967 } 1968 } 1969 break; 1970 case 3: /* distributed assembled matrix input (size>1) */ 1971 mumps->id.nnz_loc = mumps->nnz; 1972 mumps->id.irn_loc = mumps->irn; 1973 mumps->id.jcn_loc = mumps->jcn; 1974 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1975 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1976 PetscCall(MatCreateVecs(A,NULL,&b)); 1977 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 1978 PetscCall(VecDestroy(&b)); 1979 } 1980 break; 1981 } 1982 PetscMUMPS_c(mumps); 1983 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 1984 1985 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1986 F->ops->solve = MatSolve_MUMPS; 1987 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1988 F->ops->matsolve = MatMatSolve_MUMPS; 1989 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1990 1991 mumps->matstruc = SAME_NONZERO_PATTERN; 1992 PetscFunctionReturn(0); 1993 } 1994 1995 /* Note the Petsc r and c permutations are ignored */ 1996 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1997 { 1998 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1999 Vec b; 2000 const PetscInt M = A->rmap->N; 2001 2002 PetscFunctionBegin; 2003 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2004 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2005 PetscFunctionReturn(0); 2006 } 2007 2008 /* Set MUMPS options from the options database */ 2009 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2010 2011 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2012 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2013 2014 /* analysis phase */ 2015 /*----------------*/ 2016 mumps->id.job = JOB_FACTSYMBOLIC; 2017 mumps->id.n = M; 2018 switch (mumps->id.ICNTL(18)) { 2019 case 0: /* centralized assembled matrix input */ 2020 if (!mumps->myid) { 2021 mumps->id.nnz = mumps->nnz; 2022 mumps->id.irn = mumps->irn; 2023 mumps->id.jcn = mumps->jcn; 2024 if (mumps->id.ICNTL(6)>1) { 2025 mumps->id.a = (MumpsScalar*)mumps->val; 2026 } 2027 } 2028 break; 2029 case 3: /* distributed assembled matrix input (size>1) */ 2030 mumps->id.nnz_loc = mumps->nnz; 2031 mumps->id.irn_loc = mumps->irn; 2032 mumps->id.jcn_loc = mumps->jcn; 2033 if (mumps->id.ICNTL(6)>1) { 2034 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2035 } 2036 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2037 PetscCall(MatCreateVecs(A,NULL,&b)); 2038 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2039 PetscCall(VecDestroy(&b)); 2040 } 2041 break; 2042 } 2043 PetscMUMPS_c(mumps); 2044 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2045 2046 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2047 F->ops->solve = MatSolve_MUMPS; 2048 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2049 2050 mumps->matstruc = SAME_NONZERO_PATTERN; 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /* Note the Petsc r permutation and factor info are ignored */ 2055 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2056 { 2057 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2058 Vec b; 2059 const PetscInt M = A->rmap->N; 2060 2061 PetscFunctionBegin; 2062 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2063 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2064 PetscFunctionReturn(0); 2065 } 2066 2067 /* Set MUMPS options from the options database */ 2068 PetscCall(MatSetFromOptions_MUMPS(F,A)); 2069 2070 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2071 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2072 2073 /* analysis phase */ 2074 /*----------------*/ 2075 mumps->id.job = JOB_FACTSYMBOLIC; 2076 mumps->id.n = M; 2077 switch (mumps->id.ICNTL(18)) { 2078 case 0: /* centralized assembled matrix input */ 2079 if (!mumps->myid) { 2080 mumps->id.nnz = mumps->nnz; 2081 mumps->id.irn = mumps->irn; 2082 mumps->id.jcn = mumps->jcn; 2083 if (mumps->id.ICNTL(6)>1) { 2084 mumps->id.a = (MumpsScalar*)mumps->val; 2085 } 2086 } 2087 break; 2088 case 3: /* distributed assembled matrix input (size>1) */ 2089 mumps->id.nnz_loc = mumps->nnz; 2090 mumps->id.irn_loc = mumps->irn; 2091 mumps->id.jcn_loc = mumps->jcn; 2092 if (mumps->id.ICNTL(6)>1) { 2093 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2094 } 2095 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2096 PetscCall(MatCreateVecs(A,NULL,&b)); 2097 PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2098 PetscCall(VecDestroy(&b)); 2099 } 2100 break; 2101 } 2102 PetscMUMPS_c(mumps); 2103 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2104 2105 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2106 F->ops->solve = MatSolve_MUMPS; 2107 F->ops->solvetranspose = MatSolve_MUMPS; 2108 F->ops->matsolve = MatMatSolve_MUMPS; 2109 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2110 #if defined(PETSC_USE_COMPLEX) 2111 F->ops->getinertia = NULL; 2112 #else 2113 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2114 #endif 2115 2116 mumps->matstruc = SAME_NONZERO_PATTERN; 2117 PetscFunctionReturn(0); 2118 } 2119 2120 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2121 { 2122 PetscBool iascii; 2123 PetscViewerFormat format; 2124 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2125 2126 PetscFunctionBegin; 2127 /* check if matrix is mumps type */ 2128 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2129 2130 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 2131 if (iascii) { 2132 PetscCall(PetscViewerGetFormat(viewer,&format)); 2133 if (format == PETSC_VIEWER_ASCII_INFO) { 2134 PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n")); 2135 PetscCall(PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym)); 2136 PetscCall(PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par)); 2137 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1))); 2138 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2))); 2139 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3))); 2140 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4))); 2141 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5))); 2142 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6))); 2143 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7))); 2144 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8))); 2145 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10))); 2146 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11))); 2147 if (mumps->id.ICNTL(11)>0) { 2148 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4))); 2149 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5))); 2150 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6))); 2151 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8))); 2152 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9))); 2153 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11))); 2154 } 2155 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12))); 2156 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13))); 2157 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14))); 2158 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(15) (compression of the input matrix): %d\n",mumps->id.ICNTL(15))); 2159 /* ICNTL(15-17) not used */ 2160 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18))); 2161 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19))); 2162 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20))); 2163 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21))); 2164 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22))); 2165 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23))); 2166 2167 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24))); 2168 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25))); 2169 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26))); 2170 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27))); 2171 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28))); 2172 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29))); 2173 2174 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30))); 2175 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31))); 2176 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33))); 2177 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35))); 2178 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36))); 2179 PetscCall(PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38))); 2180 2181 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1))); 2182 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2))); 2183 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3))); 2184 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4))); 2185 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5))); 2186 PetscCall(PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7))); 2187 2188 /* information local to each processor */ 2189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2190 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 2191 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1))); 2192 PetscCall(PetscViewerFlush(viewer)); 2193 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2194 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2))); 2195 PetscCall(PetscViewerFlush(viewer)); 2196 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2197 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3))); 2198 PetscCall(PetscViewerFlush(viewer)); 2199 2200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2201 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15))); 2202 PetscCall(PetscViewerFlush(viewer)); 2203 2204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2205 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16))); 2206 PetscCall(PetscViewerFlush(viewer)); 2207 2208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2209 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23))); 2210 PetscCall(PetscViewerFlush(viewer)); 2211 2212 if (mumps->ninfo && mumps->ninfo <= 80) { 2213 PetscInt i; 2214 for (i=0; i<mumps->ninfo; i++) { 2215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i])); 2216 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]))); 2217 PetscCall(PetscViewerFlush(viewer)); 2218 } 2219 } 2220 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 2221 2222 if (!mumps->myid) { /* information from the host */ 2223 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1))); 2224 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2))); 2225 PetscCall(PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3))); 2226 PetscCall(PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34))); 2227 2228 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3))); 2229 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4))); 2230 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5))); 2231 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6))); 2232 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7))); 2233 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8))); 2234 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9))); 2235 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10))); 2236 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11))); 2237 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12))); 2238 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13))); 2239 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14))); 2240 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15))); 2241 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16))); 2242 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17))); 2243 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18))); 2244 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19))); 2245 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20))); 2246 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21))); 2247 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22))); 2248 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23))); 2249 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24))); 2250 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25))); 2251 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28))); 2252 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29))); 2253 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31))); 2254 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32))); 2255 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33))); 2256 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34))); 2257 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35))); 2258 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36))); 2259 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37))); 2260 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38))); 2261 PetscCall(PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39))); 2262 } 2263 } 2264 } 2265 PetscFunctionReturn(0); 2266 } 2267 2268 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2269 { 2270 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2271 2272 PetscFunctionBegin; 2273 info->block_size = 1.0; 2274 info->nz_allocated = mumps->id.INFOG(20); 2275 info->nz_used = mumps->id.INFOG(20); 2276 info->nz_unneeded = 0.0; 2277 info->assemblies = 0.0; 2278 info->mallocs = 0.0; 2279 info->memory = 0.0; 2280 info->fill_ratio_given = 0; 2281 info->fill_ratio_needed = 0; 2282 info->factor_mallocs = 0; 2283 PetscFunctionReturn(0); 2284 } 2285 2286 /* -------------------------------------------------------------------------------------------*/ 2287 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2288 { 2289 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2290 const PetscScalar *arr; 2291 const PetscInt *idxs; 2292 PetscInt size,i; 2293 2294 PetscFunctionBegin; 2295 PetscCall(ISGetLocalSize(is,&size)); 2296 if (mumps->petsc_size > 1) { 2297 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2298 2299 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2300 PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm)); 2301 PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2302 } 2303 2304 /* Schur complement matrix */ 2305 PetscCall(MatDestroy(&F->schur)); 2306 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur)); 2307 PetscCall(MatDenseGetArrayRead(F->schur,&arr)); 2308 mumps->id.schur = (MumpsScalar*)arr; 2309 mumps->id.size_schur = size; 2310 mumps->id.schur_lld = size; 2311 PetscCall(MatDenseRestoreArrayRead(F->schur,&arr)); 2312 if (mumps->sym == 1) { 2313 PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE)); 2314 } 2315 2316 /* MUMPS expects Fortran style indices */ 2317 PetscCall(PetscFree(mumps->id.listvar_schur)); 2318 PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur)); 2319 PetscCall(ISGetIndices(is,&idxs)); 2320 for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]))); 2321 PetscCall(ISRestoreIndices(is,&idxs)); 2322 if (mumps->petsc_size > 1) { 2323 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2324 } else { 2325 if (F->factortype == MAT_FACTOR_LU) { 2326 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2327 } else { 2328 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2329 } 2330 } 2331 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2332 mumps->id.ICNTL(26) = -1; 2333 PetscFunctionReturn(0); 2334 } 2335 2336 /* -------------------------------------------------------------------------------------------*/ 2337 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2338 { 2339 Mat St; 2340 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2341 PetscScalar *array; 2342 #if defined(PETSC_USE_COMPLEX) 2343 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2344 #endif 2345 2346 PetscFunctionBegin; 2347 PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2348 PetscCall(MatCreate(PETSC_COMM_SELF,&St)); 2349 PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur)); 2350 PetscCall(MatSetType(St,MATDENSE)); 2351 PetscCall(MatSetUp(St)); 2352 PetscCall(MatDenseGetArray(St,&array)); 2353 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2354 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2355 PetscInt i,j,N=mumps->id.size_schur; 2356 for (i=0;i<N;i++) { 2357 for (j=0;j<N;j++) { 2358 #if !defined(PETSC_USE_COMPLEX) 2359 PetscScalar val = mumps->id.schur[i*N+j]; 2360 #else 2361 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2362 #endif 2363 array[j*N+i] = val; 2364 } 2365 } 2366 } else { /* stored by columns */ 2367 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2368 } 2369 } else { /* either full or lower-triangular (not packed) */ 2370 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2371 PetscInt i,j,N=mumps->id.size_schur; 2372 for (i=0;i<N;i++) { 2373 for (j=i;j<N;j++) { 2374 #if !defined(PETSC_USE_COMPLEX) 2375 PetscScalar val = mumps->id.schur[i*N+j]; 2376 #else 2377 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2378 #endif 2379 array[i*N+j] = val; 2380 array[j*N+i] = val; 2381 } 2382 } 2383 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2384 PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2385 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2386 PetscInt i,j,N=mumps->id.size_schur; 2387 for (i=0;i<N;i++) { 2388 for (j=0;j<i+1;j++) { 2389 #if !defined(PETSC_USE_COMPLEX) 2390 PetscScalar val = mumps->id.schur[i*N+j]; 2391 #else 2392 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2393 #endif 2394 array[i*N+j] = val; 2395 array[j*N+i] = val; 2396 } 2397 } 2398 } 2399 } 2400 PetscCall(MatDenseRestoreArray(St,&array)); 2401 *S = St; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 /* -------------------------------------------------------------------------------------------*/ 2406 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2407 { 2408 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2409 2410 PetscFunctionBegin; 2411 PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl))); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2416 { 2417 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2418 2419 PetscFunctionBegin; 2420 *ival = mumps->id.ICNTL(icntl); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 /*@ 2425 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2426 2427 Logically Collective on Mat 2428 2429 Input Parameters: 2430 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2431 . icntl - index of MUMPS parameter array ICNTL() 2432 - ival - value of MUMPS ICNTL(icntl) 2433 2434 Options Database: 2435 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2436 2437 Level: beginner 2438 2439 References: 2440 . * - MUMPS Users' Guide 2441 2442 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2443 @*/ 2444 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2445 { 2446 PetscFunctionBegin; 2447 PetscValidType(F,1); 2448 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2449 PetscValidLogicalCollectiveInt(F,icntl,2); 2450 PetscValidLogicalCollectiveInt(F,ival,3); 2451 PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival)); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /*@ 2456 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2457 2458 Logically Collective on Mat 2459 2460 Input Parameters: 2461 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2462 - icntl - index of MUMPS parameter array ICNTL() 2463 2464 Output Parameter: 2465 . ival - value of MUMPS ICNTL(icntl) 2466 2467 Level: beginner 2468 2469 References: 2470 . * - MUMPS Users' Guide 2471 2472 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2473 @*/ 2474 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2475 { 2476 PetscFunctionBegin; 2477 PetscValidType(F,1); 2478 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2479 PetscValidLogicalCollectiveInt(F,icntl,2); 2480 PetscValidIntPointer(ival,3); 2481 PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 /* -------------------------------------------------------------------------------------------*/ 2486 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2487 { 2488 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2489 2490 PetscFunctionBegin; 2491 mumps->id.CNTL(icntl) = val; 2492 PetscFunctionReturn(0); 2493 } 2494 2495 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2496 { 2497 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2498 2499 PetscFunctionBegin; 2500 *val = mumps->id.CNTL(icntl); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /*@ 2505 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2506 2507 Logically Collective on Mat 2508 2509 Input Parameters: 2510 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2511 . icntl - index of MUMPS parameter array CNTL() 2512 - val - value of MUMPS CNTL(icntl) 2513 2514 Options Database: 2515 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2516 2517 Level: beginner 2518 2519 References: 2520 . * - MUMPS Users' Guide 2521 2522 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2523 @*/ 2524 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2525 { 2526 PetscFunctionBegin; 2527 PetscValidType(F,1); 2528 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2529 PetscValidLogicalCollectiveInt(F,icntl,2); 2530 PetscValidLogicalCollectiveReal(F,val,3); 2531 PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val)); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 /*@ 2536 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2537 2538 Logically Collective on Mat 2539 2540 Input Parameters: 2541 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2542 - icntl - index of MUMPS parameter array CNTL() 2543 2544 Output Parameter: 2545 . val - value of MUMPS CNTL(icntl) 2546 2547 Level: beginner 2548 2549 References: 2550 . * - MUMPS Users' Guide 2551 2552 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2553 @*/ 2554 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2555 { 2556 PetscFunctionBegin; 2557 PetscValidType(F,1); 2558 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2559 PetscValidLogicalCollectiveInt(F,icntl,2); 2560 PetscValidRealPointer(val,3); 2561 PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2566 { 2567 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2568 2569 PetscFunctionBegin; 2570 *info = mumps->id.INFO(icntl); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2575 { 2576 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2577 2578 PetscFunctionBegin; 2579 *infog = mumps->id.INFOG(icntl); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2584 { 2585 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2586 2587 PetscFunctionBegin; 2588 *rinfo = mumps->id.RINFO(icntl); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2593 { 2594 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2595 2596 PetscFunctionBegin; 2597 *rinfog = mumps->id.RINFOG(icntl); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2602 { 2603 Mat Bt = NULL,Btseq = NULL; 2604 PetscBool flg; 2605 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2606 PetscScalar *aa; 2607 PetscInt spnr,*ia,*ja,M,nrhs; 2608 2609 PetscFunctionBegin; 2610 PetscValidPointer(spRHS,2); 2611 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg)); 2612 if (flg) { 2613 PetscCall(MatTransposeGetMat(spRHS,&Bt)); 2614 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2615 2616 PetscCall(MatMumpsSetIcntl(F,30,1)); 2617 2618 if (mumps->petsc_size > 1) { 2619 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2620 Btseq = b->A; 2621 } else { 2622 Btseq = Bt; 2623 } 2624 2625 PetscCall(MatGetSize(spRHS,&M,&nrhs)); 2626 mumps->id.nrhs = nrhs; 2627 mumps->id.lrhs = M; 2628 mumps->id.rhs = NULL; 2629 2630 if (!mumps->myid) { 2631 PetscCall(MatSeqAIJGetArray(Btseq,&aa)); 2632 PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2633 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2634 PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 2635 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2636 } else { 2637 mumps->id.irhs_ptr = NULL; 2638 mumps->id.irhs_sparse = NULL; 2639 mumps->id.nz_rhs = 0; 2640 mumps->id.rhs_sparse = NULL; 2641 } 2642 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2643 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2644 2645 /* solve phase */ 2646 /*-------------*/ 2647 mumps->id.job = JOB_SOLVE; 2648 PetscMUMPS_c(mumps); 2649 if (mumps->id.INFOG(1) < 0) 2650 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2651 2652 if (!mumps->myid) { 2653 PetscCall(MatSeqAIJRestoreArray(Btseq,&aa)); 2654 PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2655 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2656 } 2657 PetscFunctionReturn(0); 2658 } 2659 2660 /*@ 2661 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2662 2663 Logically Collective on Mat 2664 2665 Input Parameters: 2666 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2667 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2668 2669 Output Parameter: 2670 . spRHS - requested entries of inverse of A 2671 2672 Level: beginner 2673 2674 References: 2675 . * - MUMPS Users' Guide 2676 2677 .seealso: `MatGetFactor()`, `MatCreateTranspose()` 2678 @*/ 2679 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2680 { 2681 PetscFunctionBegin; 2682 PetscValidType(F,1); 2683 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2684 PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS)); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2689 { 2690 Mat spRHS; 2691 2692 PetscFunctionBegin; 2693 PetscCall(MatCreateTranspose(spRHST,&spRHS)); 2694 PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS)); 2695 PetscCall(MatDestroy(&spRHS)); 2696 PetscFunctionReturn(0); 2697 } 2698 2699 /*@ 2700 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2701 2702 Logically Collective on Mat 2703 2704 Input Parameters: 2705 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2706 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2707 2708 Output Parameter: 2709 . spRHST - requested entries of inverse of A^T 2710 2711 Level: beginner 2712 2713 References: 2714 . * - MUMPS Users' Guide 2715 2716 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 2717 @*/ 2718 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2719 { 2720 PetscBool flg; 2721 2722 PetscFunctionBegin; 2723 PetscValidType(F,1); 2724 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2725 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 2726 PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2727 2728 PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST)); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 /*@ 2733 MatMumpsGetInfo - Get MUMPS parameter INFO() 2734 2735 Logically Collective on Mat 2736 2737 Input Parameters: 2738 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2739 - icntl - index of MUMPS parameter array INFO() 2740 2741 Output Parameter: 2742 . ival - value of MUMPS INFO(icntl) 2743 2744 Level: beginner 2745 2746 References: 2747 . * - MUMPS Users' Guide 2748 2749 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2750 @*/ 2751 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2752 { 2753 PetscFunctionBegin; 2754 PetscValidType(F,1); 2755 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2756 PetscValidIntPointer(ival,3); 2757 PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 /*@ 2762 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2763 2764 Logically Collective on Mat 2765 2766 Input Parameters: 2767 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2768 - icntl - index of MUMPS parameter array INFOG() 2769 2770 Output Parameter: 2771 . ival - value of MUMPS INFOG(icntl) 2772 2773 Level: beginner 2774 2775 References: 2776 . * - MUMPS Users' Guide 2777 2778 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 2779 @*/ 2780 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2781 { 2782 PetscFunctionBegin; 2783 PetscValidType(F,1); 2784 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2785 PetscValidIntPointer(ival,3); 2786 PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival)); 2787 PetscFunctionReturn(0); 2788 } 2789 2790 /*@ 2791 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2792 2793 Logically Collective on Mat 2794 2795 Input Parameters: 2796 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2797 - icntl - index of MUMPS parameter array RINFO() 2798 2799 Output Parameter: 2800 . val - value of MUMPS RINFO(icntl) 2801 2802 Level: beginner 2803 2804 References: 2805 . * - MUMPS Users' Guide 2806 2807 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 2808 @*/ 2809 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2810 { 2811 PetscFunctionBegin; 2812 PetscValidType(F,1); 2813 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2814 PetscValidRealPointer(val,3); 2815 PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2816 PetscFunctionReturn(0); 2817 } 2818 2819 /*@ 2820 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2821 2822 Logically Collective on Mat 2823 2824 Input Parameters: 2825 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2826 - icntl - index of MUMPS parameter array RINFOG() 2827 2828 Output Parameter: 2829 . val - value of MUMPS RINFOG(icntl) 2830 2831 Level: beginner 2832 2833 References: 2834 . * - MUMPS Users' Guide 2835 2836 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 2837 @*/ 2838 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2839 { 2840 PetscFunctionBegin; 2841 PetscValidType(F,1); 2842 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2843 PetscValidRealPointer(val,3); 2844 PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val)); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 /*MC 2849 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2850 distributed and sequential matrices via the external package MUMPS. 2851 2852 Works with MATAIJ and MATSBAIJ matrices 2853 2854 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2855 2856 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2857 2858 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2859 2860 Options Database Keys: 2861 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2862 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2863 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2864 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2865 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2866 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2867 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2868 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2869 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2870 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2871 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2872 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2873 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2874 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 2875 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2876 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2877 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2878 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2879 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2880 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2881 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2882 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2883 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2884 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2885 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2886 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2887 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2888 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2889 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2890 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2891 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2892 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2893 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2894 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2895 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2896 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2897 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2898 2899 Level: beginner 2900 2901 Notes: 2902 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2903 2904 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 2905 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 2906 2907 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2908 $ KSPGetPC(ksp,&pc); 2909 $ PCFactorGetMatrix(pc,&mat); 2910 $ MatMumpsGetInfo(mat,....); 2911 $ MatMumpsGetInfog(mat,....); etc. 2912 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2913 2914 Using MUMPS with 64-bit integers 2915 MUMPS provides 64-bit integer support in two build modes: 2916 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2917 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2918 2919 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2920 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2921 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2922 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2923 2924 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2925 2926 Two modes to run MUMPS/PETSc with OpenMP 2927 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2928 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2929 2930 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2931 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2932 2933 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2934 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2935 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2936 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2937 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2938 2939 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2940 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2941 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2942 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2943 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2944 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2945 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2946 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2947 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2948 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2949 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2950 examine the mapping result. 2951 2952 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2953 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2954 calls omp_set_num_threads(m) internally before calling MUMPS. 2955 2956 References: 2957 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2958 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2959 2960 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()` 2961 2962 M*/ 2963 2964 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2965 { 2966 PetscFunctionBegin; 2967 *type = MATSOLVERMUMPS; 2968 PetscFunctionReturn(0); 2969 } 2970 2971 /* MatGetFactor for Seq and MPI AIJ matrices */ 2972 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2973 { 2974 Mat B; 2975 Mat_MUMPS *mumps; 2976 PetscBool isSeqAIJ; 2977 PetscMPIInt size; 2978 2979 PetscFunctionBegin; 2980 #if defined(PETSC_USE_COMPLEX) 2981 PetscCheck(!A->hermitian || A->symmetric || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2982 #endif 2983 /* Create the factorization matrix */ 2984 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ)); 2985 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2986 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2987 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 2988 PetscCall(MatSetUp(B)); 2989 2990 PetscCall(PetscNewLog(B,&mumps)); 2991 2992 B->ops->view = MatView_MUMPS; 2993 B->ops->getinfo = MatGetInfo_MUMPS; 2994 2995 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 2996 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 2997 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 2998 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 2999 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3000 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3001 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3002 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3003 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3004 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3005 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3006 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3007 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3008 3009 if (ftype == MAT_FACTOR_LU) { 3010 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3011 B->factortype = MAT_FACTOR_LU; 3012 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3013 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3014 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3015 mumps->sym = 0; 3016 } else { 3017 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3018 B->factortype = MAT_FACTOR_CHOLESKY; 3019 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3020 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3021 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3022 #if defined(PETSC_USE_COMPLEX) 3023 mumps->sym = 2; 3024 #else 3025 if (A->spd_set && A->spd) mumps->sym = 1; 3026 else mumps->sym = 2; 3027 #endif 3028 } 3029 3030 /* set solvertype */ 3031 PetscCall(PetscFree(B->solvertype)); 3032 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3033 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3034 if (size == 1) { 3035 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3036 B->canuseordering = PETSC_TRUE; 3037 } 3038 B->ops->destroy = MatDestroy_MUMPS; 3039 B->data = (void*)mumps; 3040 3041 PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A)); 3042 3043 *F = B; 3044 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3045 PetscFunctionReturn(0); 3046 } 3047 3048 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3049 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3050 { 3051 Mat B; 3052 Mat_MUMPS *mumps; 3053 PetscBool isSeqSBAIJ; 3054 PetscMPIInt size; 3055 3056 PetscFunctionBegin; 3057 #if defined(PETSC_USE_COMPLEX) 3058 PetscCheck(!A->hermitian || A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3059 #endif 3060 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3061 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3062 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3063 PetscCall(MatSetUp(B)); 3064 3065 PetscCall(PetscNewLog(B,&mumps)); 3066 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ)); 3067 if (isSeqSBAIJ) { 3068 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3069 } else { 3070 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3071 } 3072 3073 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3074 B->ops->view = MatView_MUMPS; 3075 B->ops->getinfo = MatGetInfo_MUMPS; 3076 3077 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3078 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3079 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3080 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3081 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3082 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3083 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3084 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3085 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3086 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3087 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3088 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3089 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3090 3091 B->factortype = MAT_FACTOR_CHOLESKY; 3092 #if defined(PETSC_USE_COMPLEX) 3093 mumps->sym = 2; 3094 #else 3095 if (A->spd_set && A->spd) mumps->sym = 1; 3096 else mumps->sym = 2; 3097 #endif 3098 3099 /* set solvertype */ 3100 PetscCall(PetscFree(B->solvertype)); 3101 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3102 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3103 if (size == 1) { 3104 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3105 B->canuseordering = PETSC_TRUE; 3106 } 3107 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3108 B->ops->destroy = MatDestroy_MUMPS; 3109 B->data = (void*)mumps; 3110 3111 PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A)); 3112 3113 *F = B; 3114 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3115 PetscFunctionReturn(0); 3116 } 3117 3118 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3119 { 3120 Mat B; 3121 Mat_MUMPS *mumps; 3122 PetscBool isSeqBAIJ; 3123 PetscMPIInt size; 3124 3125 PetscFunctionBegin; 3126 /* Create the factorization matrix */ 3127 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ)); 3128 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3129 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3130 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3131 PetscCall(MatSetUp(B)); 3132 3133 PetscCall(PetscNewLog(B,&mumps)); 3134 if (ftype == MAT_FACTOR_LU) { 3135 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3136 B->factortype = MAT_FACTOR_LU; 3137 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3138 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3139 mumps->sym = 0; 3140 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3141 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3142 3143 B->ops->view = MatView_MUMPS; 3144 B->ops->getinfo = MatGetInfo_MUMPS; 3145 3146 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3147 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3148 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3149 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3150 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3151 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3152 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3153 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3154 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3155 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3156 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3157 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3158 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3159 3160 /* set solvertype */ 3161 PetscCall(PetscFree(B->solvertype)); 3162 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3163 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3164 if (size == 1) { 3165 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3166 B->canuseordering = PETSC_TRUE; 3167 } 3168 B->ops->destroy = MatDestroy_MUMPS; 3169 B->data = (void*)mumps; 3170 3171 PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A)); 3172 3173 *F = B; 3174 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3175 PetscFunctionReturn(0); 3176 } 3177 3178 /* MatGetFactor for Seq and MPI SELL matrices */ 3179 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3180 { 3181 Mat B; 3182 Mat_MUMPS *mumps; 3183 PetscBool isSeqSELL; 3184 PetscMPIInt size; 3185 3186 PetscFunctionBegin; 3187 /* Create the factorization matrix */ 3188 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL)); 3189 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3190 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3191 PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3192 PetscCall(MatSetUp(B)); 3193 3194 PetscCall(PetscNewLog(B,&mumps)); 3195 3196 B->ops->view = MatView_MUMPS; 3197 B->ops->getinfo = MatGetInfo_MUMPS; 3198 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3200 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3202 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3203 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3204 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3205 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3206 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3207 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3208 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3209 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3210 3211 if (ftype == MAT_FACTOR_LU) { 3212 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3213 B->factortype = MAT_FACTOR_LU; 3214 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3215 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3216 mumps->sym = 0; 3217 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3218 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3219 3220 /* set solvertype */ 3221 PetscCall(PetscFree(B->solvertype)); 3222 PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3223 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3224 if (size == 1) { 3225 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3226 B->canuseordering = PETSC_TRUE; 3227 } 3228 B->ops->destroy = MatDestroy_MUMPS; 3229 B->data = (void*)mumps; 3230 3231 PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A)); 3232 3233 *F = B; 3234 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3235 PetscFunctionReturn(0); 3236 } 3237 3238 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3239 { 3240 PetscFunctionBegin; 3241 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3242 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3243 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3244 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3245 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3246 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3247 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3248 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3249 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3250 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3251 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps)); 3252 PetscFunctionReturn(0); 3253 } 3254