1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES)) { 71 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 72 } 73 *b = (PetscMUMPSInt)(a); 74 PetscFunctionReturn(0); 75 } 76 77 /* Put these utility routines here since they are only used in this file */ 78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 79 { 80 PetscInt myval; 81 PetscBool myset; 82 PetscFunctionBegin; 83 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 84 CHKERRQ(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub)); 85 if (myset) CHKERRQ(PetscMUMPSIntCast(myval,value)); 86 if (set) *set = myset; 87 PetscFunctionReturn(0); 88 } 89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 90 91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 92 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 93 #define PetscMUMPS_c(mumps) \ 94 do { \ 95 if (mumps->use_petsc_omp_support) { \ 96 if (mumps->is_omp_master) { \ 97 CHKERRQ(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 98 MUMPS_c(&mumps->id); \ 99 CHKERRQ(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 100 } \ 101 CHKERRQ(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 102 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 103 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 104 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 105 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 106 */ \ 107 CHKERRMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\ 108 CHKERRMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm));\ 109 CHKERRMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm));\ 110 } else { \ 111 MUMPS_c(&mumps->id); \ 112 } \ 113 } while (0) 114 #else 115 #define PetscMUMPS_c(mumps) \ 116 do { MUMPS_c(&mumps->id); } while (0) 117 #endif 118 119 /* declare MumpsScalar */ 120 #if defined(PETSC_USE_COMPLEX) 121 #if defined(PETSC_USE_REAL_SINGLE) 122 #define MumpsScalar mumps_complex 123 #else 124 #define MumpsScalar mumps_double_complex 125 #endif 126 #else 127 #define MumpsScalar PetscScalar 128 #endif 129 130 /* macros s.t. indices match MUMPS documentation */ 131 #define ICNTL(I) icntl[(I)-1] 132 #define CNTL(I) cntl[(I)-1] 133 #define INFOG(I) infog[(I)-1] 134 #define INFO(I) info[(I)-1] 135 #define RINFOG(I) rinfog[(I)-1] 136 #define RINFO(I) rinfo[(I)-1] 137 138 typedef struct Mat_MUMPS Mat_MUMPS; 139 struct Mat_MUMPS { 140 #if defined(PETSC_USE_COMPLEX) 141 #if defined(PETSC_USE_REAL_SINGLE) 142 CMUMPS_STRUC_C id; 143 #else 144 ZMUMPS_STRUC_C id; 145 #endif 146 #else 147 #if defined(PETSC_USE_REAL_SINGLE) 148 SMUMPS_STRUC_C id; 149 #else 150 DMUMPS_STRUC_C id; 151 #endif 152 #endif 153 154 MatStructure matstruc; 155 PetscMPIInt myid,petsc_size; 156 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 157 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 158 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 159 PetscMUMPSInt sym; 160 MPI_Comm mumps_comm; 161 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 162 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 163 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 164 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 165 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 166 PetscInt *rhs_nrow,max_nrhs; 167 PetscMPIInt *rhs_recvcounts,*rhs_disps; 168 PetscScalar *rhs_loc,*rhs_recvbuf; 169 #endif 170 Vec b_seq,x_seq; 171 PetscInt ninfo,*info; /* which INFO to display */ 172 PetscInt sizeredrhs; 173 PetscScalar *schur_sol; 174 PetscInt schur_sizesol; 175 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 176 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 177 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 178 179 /* stuff used by petsc/mumps OpenMP support*/ 180 PetscBool use_petsc_omp_support; 181 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 182 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 183 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 184 PetscMPIInt tag,omp_comm_size; 185 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 186 MPI_Request *reqs; 187 }; 188 189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 190 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 191 */ 192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 193 { 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 CHKERRQ(PetscFree(mumps->ia_alloc)); 202 CHKERRQ(PetscMalloc1(nrow+1,&mumps->ia_alloc)); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 CHKERRQ(PetscFree(mumps->ja_alloc)); 207 CHKERRQ(PetscMalloc1(nnz,&mumps->ja_alloc)); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) CHKERRQ(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]))); 211 for (i=0; i<nnz; i++) CHKERRQ(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]))); 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 CHKERRQ(PetscMUMPSIntCast(nnz,nnz_mumps)); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscFunctionBegin; 226 CHKERRQ(PetscFree(mumps->id.listvar_schur)); 227 CHKERRQ(PetscFree(mumps->id.redrhs)); 228 CHKERRQ(PetscFree(mumps->schur_sol)); 229 mumps->id.size_schur = 0; 230 mumps->id.schur_lld = 0; 231 mumps->id.ICNTL(19) = 0; 232 PetscFunctionReturn(0); 233 } 234 235 /* solve with rhs in mumps->id.redrhs and return in the same location */ 236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 237 { 238 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 239 Mat S,B,X; 240 MatFactorSchurStatus schurstatus; 241 PetscInt sizesol; 242 243 PetscFunctionBegin; 244 CHKERRQ(MatFactorFactorizeSchurComplement(F)); 245 CHKERRQ(MatFactorGetSchurComplement(F,&S,&schurstatus)); 246 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B)); 247 CHKERRQ(MatSetType(B,((PetscObject)S)->type_name)); 248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 249 CHKERRQ(MatBindToCPU(B,S->boundtocpu)); 250 #endif 251 switch (schurstatus) { 252 case MAT_FACTOR_SCHUR_FACTORED: 253 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X)); 254 CHKERRQ(MatSetType(X,((PetscObject)S)->type_name)); 255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 256 CHKERRQ(MatBindToCPU(X,S->boundtocpu)); 257 #endif 258 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 259 CHKERRQ(MatMatSolveTranspose(S,B,X)); 260 } else { 261 CHKERRQ(MatMatSolve(S,B,X)); 262 } 263 break; 264 case MAT_FACTOR_SCHUR_INVERTED: 265 sizesol = mumps->id.nrhs*mumps->id.size_schur; 266 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 267 CHKERRQ(PetscFree(mumps->schur_sol)); 268 CHKERRQ(PetscMalloc1(sizesol,&mumps->schur_sol)); 269 mumps->schur_sizesol = sizesol; 270 } 271 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X)); 272 CHKERRQ(MatSetType(X,((PetscObject)S)->type_name)); 273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 274 CHKERRQ(MatBindToCPU(X,S->boundtocpu)); 275 #endif 276 CHKERRQ(MatProductCreateWithMat(S,B,NULL,X)); 277 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 278 CHKERRQ(MatProductSetType(X,MATPRODUCT_AtB)); 279 } else { 280 CHKERRQ(MatProductSetType(X,MATPRODUCT_AB)); 281 } 282 CHKERRQ(MatProductSetFromOptions(X)); 283 CHKERRQ(MatProductSymbolic(X)); 284 CHKERRQ(MatProductNumeric(X)); 285 286 CHKERRQ(MatCopy(X,B,SAME_NONZERO_PATTERN)); 287 break; 288 default: 289 SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status); 290 } 291 CHKERRQ(MatFactorRestoreSchurComplement(F,&S,schurstatus)); 292 CHKERRQ(MatDestroy(&B)); 293 CHKERRQ(MatDestroy(&X)); 294 PetscFunctionReturn(0); 295 } 296 297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 298 { 299 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 300 301 PetscFunctionBegin; 302 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 303 PetscFunctionReturn(0); 304 } 305 if (!expansion) { /* prepare for the condensation step */ 306 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 307 /* allocate MUMPS internal array to store reduced right-hand sides */ 308 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 309 CHKERRQ(PetscFree(mumps->id.redrhs)); 310 mumps->id.lredrhs = mumps->id.size_schur; 311 CHKERRQ(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs)); 312 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 313 } 314 mumps->id.ICNTL(26) = 1; /* condensation phase */ 315 } else { /* prepare for the expansion step */ 316 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 317 CHKERRQ(MatMumpsSolveSchur_Private(F)); 318 mumps->id.ICNTL(26) = 2; /* expansion phase */ 319 PetscMUMPS_c(mumps); 320 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 321 /* restore defaults */ 322 mumps->id.ICNTL(26) = -1; 323 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 324 if (mumps->id.nrhs > 1) { 325 CHKERRQ(PetscFree(mumps->id.redrhs)); 326 mumps->id.lredrhs = 0; 327 mumps->sizeredrhs = 0; 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 /* 334 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 335 336 input: 337 A - matrix in aij,baij or sbaij format 338 shift - 0: C style output triple; 1: Fortran style output triple. 339 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 340 MAT_REUSE_MATRIX: only the values in v array are updated 341 output: 342 nnz - dim of r, c, and v (number of local nonzero entries of A) 343 r, c, v - row and col index, matrix values (matrix triples) 344 345 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 346 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 347 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 348 349 */ 350 351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 352 { 353 const PetscScalar *av; 354 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 355 PetscInt64 nz,rnz,i,j,k; 356 PetscMUMPSInt *row,*col; 357 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 358 359 PetscFunctionBegin; 360 CHKERRQ(MatSeqAIJGetArrayRead(A,&av)); 361 mumps->val = (PetscScalar*)av; 362 if (reuse == MAT_INITIAL_MATRIX) { 363 nz = aa->nz; 364 ai = aa->i; 365 aj = aa->j; 366 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 367 for (i=k=0; i<M; i++) { 368 rnz = ai[i+1] - ai[i]; 369 ajj = aj + ai[i]; 370 for (j=0; j<rnz; j++) { 371 CHKERRQ(PetscMUMPSIntCast(i+shift,&row[k])); 372 CHKERRQ(PetscMUMPSIntCast(ajj[j] + shift,&col[k])); 373 k++; 374 } 375 } 376 mumps->irn = row; 377 mumps->jcn = col; 378 mumps->nnz = nz; 379 } 380 CHKERRQ(MatSeqAIJRestoreArrayRead(A,&av)); 381 PetscFunctionReturn(0); 382 } 383 384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 385 { 386 PetscInt64 nz,i,j,k,r; 387 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 388 PetscMUMPSInt *row,*col; 389 390 PetscFunctionBegin; 391 mumps->val = a->val; 392 if (reuse == MAT_INITIAL_MATRIX) { 393 nz = a->sliidx[a->totalslices]; 394 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 395 for (i=k=0; i<a->totalslices; i++) { 396 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 397 CHKERRQ(PetscMUMPSIntCast(8*i+r+shift,&row[k++])); 398 } 399 } 400 for (i=0;i<nz;i++) CHKERRQ(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i])); 401 mumps->irn = row; 402 mumps->jcn = col; 403 mumps->nnz = nz; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 409 { 410 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 411 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 412 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 413 PetscInt bs; 414 PetscMUMPSInt *row,*col; 415 416 PetscFunctionBegin; 417 CHKERRQ(MatGetBlockSize(A,&bs)); 418 M = A->rmap->N/bs; 419 mumps->val = aa->a; 420 if (reuse == MAT_INITIAL_MATRIX) { 421 ai = aa->i; aj = aa->j; 422 nz = bs2*aa->nz; 423 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 424 for (i=0; i<M; i++) { 425 ajj = aj + ai[i]; 426 rnz = ai[i+1] - ai[i]; 427 for (k=0; k<rnz; k++) { 428 for (j=0; j<bs; j++) { 429 for (m=0; m<bs; m++) { 430 CHKERRQ(PetscMUMPSIntCast(i*bs + m + shift,&row[idx])); 431 CHKERRQ(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx])); 432 idx++; 433 } 434 } 435 } 436 } 437 mumps->irn = row; 438 mumps->jcn = col; 439 mumps->nnz = nz; 440 } 441 PetscFunctionReturn(0); 442 } 443 444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 445 { 446 const PetscInt *ai, *aj,*ajj; 447 PetscInt bs; 448 PetscInt64 nz,rnz,i,j,k,m; 449 PetscMUMPSInt *row,*col; 450 PetscScalar *val; 451 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 452 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 453 #if defined(PETSC_USE_COMPLEX) 454 PetscBool hermitian; 455 #endif 456 457 PetscFunctionBegin; 458 #if defined(PETSC_USE_COMPLEX) 459 CHKERRQ(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 460 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 461 #endif 462 ai = aa->i; 463 aj = aa->j; 464 CHKERRQ(MatGetBlockSize(A,&bs)); 465 if (reuse == MAT_INITIAL_MATRIX) { 466 nz = aa->nz; 467 CHKERRQ(PetscMalloc2(bs2*nz,&row,bs2*nz,&col)); 468 if (bs>1) { 469 CHKERRQ(PetscMalloc1(bs2*nz,&mumps->val_alloc)); 470 mumps->val = mumps->val_alloc; 471 } else { 472 mumps->val = aa->a; 473 } 474 mumps->irn = row; 475 mumps->jcn = col; 476 } else { 477 if (bs == 1) mumps->val = aa->a; 478 row = mumps->irn; 479 col = mumps->jcn; 480 } 481 val = mumps->val; 482 483 nz = 0; 484 if (bs>1) { 485 for (i=0; i<mbs; i++) { 486 rnz = ai[i+1] - ai[i]; 487 ajj = aj + ai[i]; 488 for (j=0; j<rnz; j++) { 489 for (k=0; k<bs; k++) { 490 for (m=0; m<bs; m++) { 491 if (ajj[j]>i || k>=m) { 492 if (reuse == MAT_INITIAL_MATRIX) { 493 CHKERRQ(PetscMUMPSIntCast(i*bs + m + shift,&row[nz])); 494 CHKERRQ(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz])); 495 } 496 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 497 } 498 } 499 } 500 } 501 } 502 } else if (reuse == MAT_INITIAL_MATRIX) { 503 for (i=0; i<mbs; i++) { 504 rnz = ai[i+1] - ai[i]; 505 ajj = aj + ai[i]; 506 for (j=0; j<rnz; j++) { 507 CHKERRQ(PetscMUMPSIntCast(i+shift,&row[nz])); 508 CHKERRQ(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 509 nz++; 510 } 511 } 512 PetscCheckFalse(nz != aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz); 513 } 514 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 515 PetscFunctionReturn(0); 516 } 517 518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 519 { 520 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 521 PetscInt64 nz,rnz,i,j; 522 const PetscScalar *av,*v1; 523 PetscScalar *val; 524 PetscMUMPSInt *row,*col; 525 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 526 PetscBool missing; 527 #if defined(PETSC_USE_COMPLEX) 528 PetscBool hermitian; 529 #endif 530 531 PetscFunctionBegin; 532 #if defined(PETSC_USE_COMPLEX) 533 CHKERRQ(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 534 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 535 #endif 536 CHKERRQ(MatSeqAIJGetArrayRead(A,&av)); 537 ai = aa->i; aj = aa->j; 538 adiag = aa->diag; 539 CHKERRQ(MatMissingDiagonal_SeqAIJ(A,&missing,NULL)); 540 if (reuse == MAT_INITIAL_MATRIX) { 541 /* count nz in the upper triangular part of A */ 542 nz = 0; 543 if (missing) { 544 for (i=0; i<M; i++) { 545 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 546 for (j=ai[i];j<ai[i+1];j++) { 547 if (aj[j] < i) continue; 548 nz++; 549 } 550 } else { 551 nz += ai[i+1] - adiag[i]; 552 } 553 } 554 } else { 555 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 556 } 557 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 558 CHKERRQ(PetscMalloc1(nz,&val)); 559 mumps->nnz = nz; 560 mumps->irn = row; 561 mumps->jcn = col; 562 mumps->val = mumps->val_alloc = val; 563 564 nz = 0; 565 if (missing) { 566 for (i=0; i<M; i++) { 567 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 568 for (j=ai[i];j<ai[i+1];j++) { 569 if (aj[j] < i) continue; 570 CHKERRQ(PetscMUMPSIntCast(i+shift,&row[nz])); 571 CHKERRQ(PetscMUMPSIntCast(aj[j]+shift,&col[nz])); 572 val[nz] = av[j]; 573 nz++; 574 } 575 } else { 576 rnz = ai[i+1] - adiag[i]; 577 ajj = aj + adiag[i]; 578 v1 = av + adiag[i]; 579 for (j=0; j<rnz; j++) { 580 CHKERRQ(PetscMUMPSIntCast(i+shift,&row[nz])); 581 CHKERRQ(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 582 val[nz++] = v1[j]; 583 } 584 } 585 } 586 } else { 587 for (i=0; i<M; i++) { 588 rnz = ai[i+1] - adiag[i]; 589 ajj = aj + adiag[i]; 590 v1 = av + adiag[i]; 591 for (j=0; j<rnz; j++) { 592 CHKERRQ(PetscMUMPSIntCast(i+shift,&row[nz])); 593 CHKERRQ(PetscMUMPSIntCast(ajj[j] + shift,&col[nz])); 594 val[nz++] = v1[j]; 595 } 596 } 597 } 598 } else { 599 nz = 0; 600 val = mumps->val; 601 if (missing) { 602 for (i=0; i <M; i++) { 603 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 604 for (j=ai[i];j<ai[i+1];j++) { 605 if (aj[j] < i) continue; 606 val[nz++] = av[j]; 607 } 608 } else { 609 rnz = ai[i+1] - adiag[i]; 610 v1 = av + adiag[i]; 611 for (j=0; j<rnz; j++) { 612 val[nz++] = v1[j]; 613 } 614 } 615 } 616 } else { 617 for (i=0; i <M; i++) { 618 rnz = ai[i+1] - adiag[i]; 619 v1 = av + adiag[i]; 620 for (j=0; j<rnz; j++) { 621 val[nz++] = v1[j]; 622 } 623 } 624 } 625 } 626 CHKERRQ(MatSeqAIJRestoreArrayRead(A,&av)); 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 631 { 632 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 633 PetscInt bs; 634 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 635 PetscMUMPSInt *row,*col; 636 const PetscScalar *av,*bv,*v1,*v2; 637 PetscScalar *val; 638 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 639 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 640 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 641 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 642 #if defined(PETSC_USE_COMPLEX) 643 PetscBool hermitian; 644 #endif 645 646 PetscFunctionBegin; 647 #if defined(PETSC_USE_COMPLEX) 648 CHKERRQ(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 649 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 650 #endif 651 CHKERRQ(MatGetBlockSize(A,&bs)); 652 rstart = A->rmap->rstart; 653 ai = aa->i; 654 aj = aa->j; 655 bi = bb->i; 656 bj = bb->j; 657 av = aa->a; 658 bv = bb->a; 659 660 garray = mat->garray; 661 662 if (reuse == MAT_INITIAL_MATRIX) { 663 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 664 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 665 CHKERRQ(PetscMalloc1(nz,&val)); 666 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 667 mumps->irn = row; 668 mumps->jcn = col; 669 mumps->val = mumps->val_alloc = val; 670 } else { 671 val = mumps->val; 672 } 673 674 jj = 0; irow = rstart; 675 for (i=0; i<mbs; i++) { 676 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 677 countA = ai[i+1] - ai[i]; 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 v1 = av + ai[i]*bs2; 681 v2 = bv + bi[i]*bs2; 682 683 if (bs>1) { 684 /* A-part */ 685 for (j=0; j<countA; j++) { 686 for (k=0; k<bs; k++) { 687 for (m=0; m<bs; m++) { 688 if (rstart + ajj[j]*bs>irow || k>=m) { 689 if (reuse == MAT_INITIAL_MATRIX) { 690 CHKERRQ(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 691 CHKERRQ(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj])); 692 } 693 val[jj++] = v1[j*bs2 + m + k*bs]; 694 } 695 } 696 } 697 } 698 699 /* B-part */ 700 for (j=0; j < countB; j++) { 701 for (k=0; k<bs; k++) { 702 for (m=0; m<bs; m++) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 CHKERRQ(PetscMUMPSIntCast(irow + m + shift,&row[jj])); 705 CHKERRQ(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj])); 706 } 707 val[jj++] = v2[j*bs2 + m + k*bs]; 708 } 709 } 710 } 711 } else { 712 /* A-part */ 713 for (j=0; j<countA; j++) { 714 if (reuse == MAT_INITIAL_MATRIX) { 715 CHKERRQ(PetscMUMPSIntCast(irow + shift,&row[jj])); 716 CHKERRQ(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 717 } 718 val[jj++] = v1[j]; 719 } 720 721 /* B-part */ 722 for (j=0; j < countB; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 CHKERRQ(PetscMUMPSIntCast(irow + shift,&row[jj])); 725 CHKERRQ(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 726 } 727 val[jj++] = v2[j]; 728 } 729 } 730 irow+=bs; 731 } 732 mumps->nnz = jj; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 737 { 738 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 739 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 740 PetscMUMPSInt *row,*col; 741 const PetscScalar *av, *bv,*v1,*v2; 742 PetscScalar *val; 743 Mat Ad,Ao; 744 Mat_SeqAIJ *aa; 745 Mat_SeqAIJ *bb; 746 747 PetscFunctionBegin; 748 CHKERRQ(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 749 CHKERRQ(MatSeqAIJGetArrayRead(Ad,&av)); 750 CHKERRQ(MatSeqAIJGetArrayRead(Ao,&bv)); 751 752 aa = (Mat_SeqAIJ*)(Ad)->data; 753 bb = (Mat_SeqAIJ*)(Ao)->data; 754 ai = aa->i; 755 aj = aa->j; 756 bi = bb->i; 757 bj = bb->j; 758 759 rstart = A->rmap->rstart; 760 761 if (reuse == MAT_INITIAL_MATRIX) { 762 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 763 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 764 CHKERRQ(PetscMalloc1(nz,&val)); 765 mumps->nnz = nz; 766 mumps->irn = row; 767 mumps->jcn = col; 768 mumps->val = mumps->val_alloc = val; 769 } else { 770 val = mumps->val; 771 } 772 773 jj = 0; irow = rstart; 774 for (i=0; i<m; i++) { 775 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 776 countA = ai[i+1] - ai[i]; 777 countB = bi[i+1] - bi[i]; 778 bjj = bj + bi[i]; 779 v1 = av + ai[i]; 780 v2 = bv + bi[i]; 781 782 /* A-part */ 783 for (j=0; j<countA; j++) { 784 if (reuse == MAT_INITIAL_MATRIX) { 785 CHKERRQ(PetscMUMPSIntCast(irow + shift,&row[jj])); 786 CHKERRQ(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 787 } 788 val[jj++] = v1[j]; 789 } 790 791 /* B-part */ 792 for (j=0; j < countB; j++) { 793 if (reuse == MAT_INITIAL_MATRIX) { 794 CHKERRQ(PetscMUMPSIntCast(irow + shift,&row[jj])); 795 CHKERRQ(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 796 } 797 val[jj++] = v2[j]; 798 } 799 irow++; 800 } 801 CHKERRQ(MatSeqAIJRestoreArrayRead(Ad,&av)); 802 CHKERRQ(MatSeqAIJRestoreArrayRead(Ao,&bv)); 803 PetscFunctionReturn(0); 804 } 805 806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 807 { 808 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 809 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 810 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 811 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 812 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 813 const PetscInt bs2=mat->bs2; 814 PetscInt bs; 815 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 816 PetscMUMPSInt *row,*col; 817 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 818 PetscScalar *val; 819 820 PetscFunctionBegin; 821 CHKERRQ(MatGetBlockSize(A,&bs)); 822 if (reuse == MAT_INITIAL_MATRIX) { 823 nz = bs2*(aa->nz + bb->nz); 824 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 825 CHKERRQ(PetscMalloc1(nz,&val)); 826 mumps->nnz = nz; 827 mumps->irn = row; 828 mumps->jcn = col; 829 mumps->val = mumps->val_alloc = val; 830 } else { 831 val = mumps->val; 832 } 833 834 jj = 0; irow = rstart; 835 for (i=0; i<mbs; i++) { 836 countA = ai[i+1] - ai[i]; 837 countB = bi[i+1] - bi[i]; 838 ajj = aj + ai[i]; 839 bjj = bj + bi[i]; 840 v1 = av + bs2*ai[i]; 841 v2 = bv + bs2*bi[i]; 842 843 idx = 0; 844 /* A-part */ 845 for (k=0; k<countA; k++) { 846 for (j=0; j<bs; j++) { 847 for (n=0; n<bs; n++) { 848 if (reuse == MAT_INITIAL_MATRIX) { 849 CHKERRQ(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 850 CHKERRQ(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj])); 851 } 852 val[jj++] = v1[idx++]; 853 } 854 } 855 } 856 857 idx = 0; 858 /* B-part */ 859 for (k=0; k<countB; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 CHKERRQ(PetscMUMPSIntCast(irow + n + shift,&row[jj])); 864 CHKERRQ(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj])); 865 } 866 val[jj++] = v2[idx++]; 867 } 868 } 869 } 870 irow += bs; 871 } 872 PetscFunctionReturn(0); 873 } 874 875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 876 { 877 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 878 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 879 PetscMUMPSInt *row,*col; 880 const PetscScalar *av, *bv,*v1,*v2; 881 PetscScalar *val; 882 Mat Ad,Ao; 883 Mat_SeqAIJ *aa; 884 Mat_SeqAIJ *bb; 885 #if defined(PETSC_USE_COMPLEX) 886 PetscBool hermitian; 887 #endif 888 889 PetscFunctionBegin; 890 #if defined(PETSC_USE_COMPLEX) 891 CHKERRQ(MatGetOption(A,MAT_HERMITIAN,&hermitian)); 892 PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 893 #endif 894 CHKERRQ(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray)); 895 CHKERRQ(MatSeqAIJGetArrayRead(Ad,&av)); 896 CHKERRQ(MatSeqAIJGetArrayRead(Ao,&bv)); 897 898 aa = (Mat_SeqAIJ*)(Ad)->data; 899 bb = (Mat_SeqAIJ*)(Ao)->data; 900 ai = aa->i; 901 aj = aa->j; 902 adiag = aa->diag; 903 bi = bb->i; 904 bj = bb->j; 905 906 rstart = A->rmap->rstart; 907 908 if (reuse == MAT_INITIAL_MATRIX) { 909 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 910 nzb = 0; /* num of upper triangular entries in mat->B */ 911 for (i=0; i<m; i++) { 912 nza += (ai[i+1] - adiag[i]); 913 countB = bi[i+1] - bi[i]; 914 bjj = bj + bi[i]; 915 for (j=0; j<countB; j++) { 916 if (garray[bjj[j]] > rstart) nzb++; 917 } 918 } 919 920 nz = nza + nzb; /* total nz of upper triangular part of mat */ 921 CHKERRQ(PetscMalloc2(nz,&row,nz,&col)); 922 CHKERRQ(PetscMalloc1(nz,&val)); 923 mumps->nnz = nz; 924 mumps->irn = row; 925 mumps->jcn = col; 926 mumps->val = mumps->val_alloc = val; 927 } else { 928 val = mumps->val; 929 } 930 931 jj = 0; irow = rstart; 932 for (i=0; i<m; i++) { 933 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 934 v1 = av + adiag[i]; 935 countA = ai[i+1] - adiag[i]; 936 countB = bi[i+1] - bi[i]; 937 bjj = bj + bi[i]; 938 v2 = bv + bi[i]; 939 940 /* A-part */ 941 for (j=0; j<countA; j++) { 942 if (reuse == MAT_INITIAL_MATRIX) { 943 CHKERRQ(PetscMUMPSIntCast(irow + shift,&row[jj])); 944 CHKERRQ(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj])); 945 } 946 val[jj++] = v1[j]; 947 } 948 949 /* B-part */ 950 for (j=0; j < countB; j++) { 951 if (garray[bjj[j]] > rstart) { 952 if (reuse == MAT_INITIAL_MATRIX) { 953 CHKERRQ(PetscMUMPSIntCast(irow + shift,&row[jj])); 954 CHKERRQ(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj])); 955 } 956 val[jj++] = v2[j]; 957 } 958 } 959 irow++; 960 } 961 CHKERRQ(MatSeqAIJRestoreArrayRead(Ad,&av)); 962 CHKERRQ(MatSeqAIJRestoreArrayRead(Ao,&bv)); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatDestroy_MUMPS(Mat A) 967 { 968 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 969 970 PetscFunctionBegin; 971 CHKERRQ(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 972 CHKERRQ(VecScatterDestroy(&mumps->scat_rhs)); 973 CHKERRQ(VecScatterDestroy(&mumps->scat_sol)); 974 CHKERRQ(VecDestroy(&mumps->b_seq)); 975 CHKERRQ(VecDestroy(&mumps->x_seq)); 976 CHKERRQ(PetscFree(mumps->id.perm_in)); 977 CHKERRQ(PetscFree2(mumps->irn,mumps->jcn)); 978 CHKERRQ(PetscFree(mumps->val_alloc)); 979 CHKERRQ(PetscFree(mumps->info)); 980 CHKERRQ(MatMumpsResetSchur_Private(mumps)); 981 mumps->id.job = JOB_END; 982 PetscMUMPS_c(mumps); 983 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 984 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 985 if (mumps->use_petsc_omp_support) { 986 CHKERRQ(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 987 CHKERRQ(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 988 CHKERRQ(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps)); 989 } 990 #endif 991 CHKERRQ(PetscFree(mumps->ia_alloc)); 992 CHKERRQ(PetscFree(mumps->ja_alloc)); 993 CHKERRQ(PetscFree(mumps->recvcount)); 994 CHKERRQ(PetscFree(mumps->reqs)); 995 CHKERRQ(PetscFree(mumps->irhs_loc)); 996 if (mumps->mumps_comm != MPI_COMM_NULL) CHKERRQ(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 997 CHKERRQ(PetscFree(A->data)); 998 999 /* clear composed functions */ 1000 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1001 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL)); 1002 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL)); 1003 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL)); 1004 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL)); 1005 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL)); 1006 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL)); 1007 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL)); 1008 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL)); 1009 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL)); 1010 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL)); 1011 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL)); 1012 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL)); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1018 { 1019 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1020 const PetscMPIInt ompsize=mumps->omp_comm_size; 1021 PetscInt i,m,M,rstart; 1022 1023 PetscFunctionBegin; 1024 CHKERRQ(MatGetSize(A,&M,NULL)); 1025 CHKERRQ(MatGetLocalSize(A,&m,NULL)); 1026 PetscCheckFalse(M > PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1027 if (ompsize == 1) { 1028 if (!mumps->irhs_loc) { 1029 mumps->nloc_rhs = m; 1030 CHKERRQ(PetscMalloc1(m,&mumps->irhs_loc)); 1031 CHKERRQ(MatGetOwnershipRange(A,&rstart,NULL)); 1032 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1033 } 1034 mumps->id.rhs_loc = (MumpsScalar*)array; 1035 } else { 1036 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1037 const PetscInt *ranges; 1038 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1039 MPI_Group petsc_group,omp_group; 1040 PetscScalar *recvbuf=NULL; 1041 1042 if (mumps->is_omp_master) { 1043 /* Lazily initialize the omp stuff for distributed rhs */ 1044 if (!mumps->irhs_loc) { 1045 CHKERRQ(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks)); 1046 CHKERRQ(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps)); 1047 CHKERRMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group)); 1048 CHKERRMPI(MPI_Comm_group(mumps->omp_comm,&omp_group)); 1049 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1050 CHKERRMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks)); 1051 1052 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1053 mumps->nloc_rhs = 0; 1054 CHKERRQ(MatGetOwnershipRanges(A,&ranges)); 1055 for (j=0; j<ompsize; j++) { 1056 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1057 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1058 } 1059 CHKERRQ(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc)); 1060 for (j=k=0; j<ompsize; j++) { 1061 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1062 } 1063 1064 CHKERRQ(PetscFree2(omp_ranks,petsc_ranks)); 1065 CHKERRMPI(MPI_Group_free(&petsc_group)); 1066 CHKERRMPI(MPI_Group_free(&omp_group)); 1067 } 1068 1069 /* Realloc buffers when current nrhs is bigger than what we have met */ 1070 if (nrhs > mumps->max_nrhs) { 1071 CHKERRQ(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf)); 1072 CHKERRQ(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf)); 1073 mumps->max_nrhs = nrhs; 1074 } 1075 1076 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1077 for (j=0; j<ompsize; j++) CHKERRQ(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j])); 1078 mumps->rhs_disps[0] = 0; 1079 for (j=1; j<ompsize; j++) { 1080 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1081 PetscCheckFalse(mumps->rhs_disps[j] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1082 } 1083 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1084 } 1085 1086 CHKERRQ(PetscMPIIntCast(m*nrhs,&sendcount)); 1087 CHKERRMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm)); 1088 1089 if (mumps->is_omp_master) { 1090 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1091 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1092 for (j=0; j<ompsize; j++) { 1093 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1094 dst = dstbase; 1095 for (i=0; i<nrhs; i++) { 1096 CHKERRQ(PetscArraycpy(dst,src,mumps->rhs_nrow[j])); 1097 src += mumps->rhs_nrow[j]; 1098 dst += mumps->nloc_rhs; 1099 } 1100 dstbase += mumps->rhs_nrow[j]; 1101 } 1102 } 1103 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1104 } 1105 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1106 } 1107 mumps->id.nrhs = nrhs; 1108 mumps->id.nloc_rhs = mumps->nloc_rhs; 1109 mumps->id.lrhs_loc = mumps->nloc_rhs; 1110 mumps->id.irhs_loc = mumps->irhs_loc; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1115 { 1116 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1117 const PetscScalar *rarray = NULL; 1118 PetscScalar *array; 1119 IS is_iden,is_petsc; 1120 PetscInt i; 1121 PetscBool second_solve = PETSC_FALSE; 1122 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1123 1124 PetscFunctionBegin; 1125 CHKERRQ(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1)); 1126 CHKERRQ(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2)); 1127 1128 if (A->factorerrortype) { 1129 CHKERRQ(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1130 CHKERRQ(VecSetInf(x)); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 mumps->id.nrhs = 1; 1135 if (mumps->petsc_size > 1) { 1136 if (mumps->ICNTL20 == 10) { 1137 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1138 CHKERRQ(VecGetArrayRead(b,&rarray)); 1139 CHKERRQ(MatMumpsSetUpDistRHSInfo(A,1,rarray)); 1140 } else { 1141 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/ 1142 CHKERRQ(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1143 CHKERRQ(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1144 if (!mumps->myid) { 1145 CHKERRQ(VecGetArray(mumps->b_seq,&array)); 1146 mumps->id.rhs = (MumpsScalar*)array; 1147 } 1148 } 1149 } else { /* petsc_size == 1 */ 1150 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1151 CHKERRQ(VecCopy(b,x)); 1152 CHKERRQ(VecGetArray(x,&array)); 1153 mumps->id.rhs = (MumpsScalar*)array; 1154 } 1155 1156 /* 1157 handle condensation step of Schur complement (if any) 1158 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1159 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1160 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1161 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1162 */ 1163 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1164 PetscCheckFalse(mumps->petsc_size > 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1165 second_solve = PETSC_TRUE; 1166 CHKERRQ(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1167 } 1168 /* solve phase */ 1169 /*-------------*/ 1170 mumps->id.job = JOB_SOLVE; 1171 PetscMUMPS_c(mumps); 1172 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1173 1174 /* handle expansion step of Schur complement (if any) */ 1175 if (second_solve) { 1176 CHKERRQ(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1177 } 1178 1179 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1180 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1181 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1182 CHKERRQ(VecScatterDestroy(&mumps->scat_sol)); 1183 } 1184 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1185 PetscInt *isol2_loc=NULL; 1186 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */ 1187 CHKERRQ(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc)); 1188 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1189 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc)); /* to */ 1190 CHKERRQ(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol)); 1191 CHKERRQ(ISDestroy(&is_iden)); 1192 CHKERRQ(ISDestroy(&is_petsc)); 1193 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1194 } 1195 1196 CHKERRQ(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1197 CHKERRQ(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD)); 1198 } 1199 1200 if (mumps->petsc_size > 1) { 1201 if (mumps->ICNTL20 == 10) { 1202 CHKERRQ(VecRestoreArrayRead(b,&rarray)); 1203 } else if (!mumps->myid) { 1204 CHKERRQ(VecRestoreArray(mumps->b_seq,&array)); 1205 } 1206 } else CHKERRQ(VecRestoreArray(x,&array)); 1207 1208 CHKERRQ(PetscLogFlops(2.0*mumps->id.RINFO(3))); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1213 { 1214 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1215 1216 PetscFunctionBegin; 1217 mumps->id.ICNTL(9) = 0; 1218 CHKERRQ(MatSolve_MUMPS(A,b,x)); 1219 mumps->id.ICNTL(9) = 1; 1220 PetscFunctionReturn(0); 1221 } 1222 1223 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1224 { 1225 Mat Bt = NULL; 1226 PetscBool denseX,denseB,flg,flgT; 1227 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1228 PetscInt i,nrhs,M; 1229 PetscScalar *array; 1230 const PetscScalar *rbray; 1231 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1232 PetscMUMPSInt *isol_loc,*isol_loc_save; 1233 PetscScalar *bray,*sol_loc,*sol_loc_save; 1234 IS is_to,is_from; 1235 PetscInt k,proc,j,m,myrstart; 1236 const PetscInt *rstart; 1237 Vec v_mpi,msol_loc; 1238 VecScatter scat_sol; 1239 Vec b_seq; 1240 VecScatter scat_rhs; 1241 PetscScalar *aa; 1242 PetscInt spnr,*ia,*ja; 1243 Mat_MPIAIJ *b = NULL; 1244 1245 PetscFunctionBegin; 1246 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL)); 1247 PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1248 1249 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL)); 1250 if (denseB) { 1251 PetscCheckFalse(B->rmap->n != X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1252 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1253 } else { /* sparse B */ 1254 PetscCheckFalse(X == B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1255 CHKERRQ(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT)); 1256 if (flgT) { /* input B is transpose of actural RHS matrix, 1257 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1258 CHKERRQ(MatTransposeGetMat(B,&Bt)); 1259 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1260 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1261 } 1262 1263 CHKERRQ(MatGetSize(B,&M,&nrhs)); 1264 mumps->id.nrhs = nrhs; 1265 mumps->id.lrhs = M; 1266 mumps->id.rhs = NULL; 1267 1268 if (mumps->petsc_size == 1) { 1269 PetscScalar *aa; 1270 PetscInt spnr,*ia,*ja; 1271 PetscBool second_solve = PETSC_FALSE; 1272 1273 CHKERRQ(MatDenseGetArray(X,&array)); 1274 mumps->id.rhs = (MumpsScalar*)array; 1275 1276 if (denseB) { 1277 /* copy B to X */ 1278 CHKERRQ(MatDenseGetArrayRead(B,&rbray)); 1279 CHKERRQ(PetscArraycpy(array,rbray,M*nrhs)); 1280 CHKERRQ(MatDenseRestoreArrayRead(B,&rbray)); 1281 } else { /* sparse B */ 1282 CHKERRQ(MatSeqAIJGetArray(Bt,&aa)); 1283 CHKERRQ(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1284 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1285 CHKERRQ(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1286 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1287 } 1288 /* handle condensation step of Schur complement (if any) */ 1289 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1290 second_solve = PETSC_TRUE; 1291 CHKERRQ(MatMumpsHandleSchur_Private(A,PETSC_FALSE)); 1292 } 1293 /* solve phase */ 1294 /*-------------*/ 1295 mumps->id.job = JOB_SOLVE; 1296 PetscMUMPS_c(mumps); 1297 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1298 1299 /* handle expansion step of Schur complement (if any) */ 1300 if (second_solve) { 1301 CHKERRQ(MatMumpsHandleSchur_Private(A,PETSC_TRUE)); 1302 } 1303 if (!denseB) { /* sparse B */ 1304 CHKERRQ(MatSeqAIJRestoreArray(Bt,&aa)); 1305 CHKERRQ(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1306 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1307 } 1308 CHKERRQ(MatDenseRestoreArray(X,&array)); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1313 PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc"); 1314 1315 /* create msol_loc to hold mumps local solution */ 1316 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1317 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1318 1319 lsol_loc = mumps->id.lsol_loc; 1320 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1321 CHKERRQ(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc)); 1322 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1323 mumps->id.isol_loc = isol_loc; 1324 1325 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc)); 1326 1327 if (denseB) { 1328 if (mumps->ICNTL20 == 10) { 1329 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1330 CHKERRQ(MatDenseGetArrayRead(B,&rbray)); 1331 CHKERRQ(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray)); 1332 CHKERRQ(MatDenseRestoreArrayRead(B,&rbray)); 1333 CHKERRQ(MatGetLocalSize(B,&m,NULL)); 1334 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi)); 1335 } else { 1336 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1337 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1338 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1339 0, re-arrange B into desired order, which is a local operation. 1340 */ 1341 1342 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1343 /* wrap dense rhs matrix B into a vector v_mpi */ 1344 CHKERRQ(MatGetLocalSize(B,&m,NULL)); 1345 CHKERRQ(MatDenseGetArray(B,&bray)); 1346 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1347 CHKERRQ(MatDenseRestoreArray(B,&bray)); 1348 1349 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1350 if (!mumps->myid) { 1351 PetscInt *idx; 1352 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1353 CHKERRQ(PetscMalloc1(nrhs*M,&idx)); 1354 CHKERRQ(MatGetOwnershipRanges(B,&rstart)); 1355 k = 0; 1356 for (proc=0; proc<mumps->petsc_size; proc++) { 1357 for (j=0; j<nrhs; j++) { 1358 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1359 } 1360 } 1361 1362 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq)); 1363 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to)); 1364 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from)); 1365 } else { 1366 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq)); 1367 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to)); 1368 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from)); 1369 } 1370 CHKERRQ(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs)); 1371 CHKERRQ(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1372 CHKERRQ(ISDestroy(&is_to)); 1373 CHKERRQ(ISDestroy(&is_from)); 1374 CHKERRQ(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD)); 1375 1376 if (!mumps->myid) { /* define rhs on the host */ 1377 CHKERRQ(VecGetArray(b_seq,&bray)); 1378 mumps->id.rhs = (MumpsScalar*)bray; 1379 CHKERRQ(VecRestoreArray(b_seq,&bray)); 1380 } 1381 } 1382 } else { /* sparse B */ 1383 b = (Mat_MPIAIJ*)Bt->data; 1384 1385 /* wrap dense X into a vector v_mpi */ 1386 CHKERRQ(MatGetLocalSize(X,&m,NULL)); 1387 CHKERRQ(MatDenseGetArray(X,&bray)); 1388 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi)); 1389 CHKERRQ(MatDenseRestoreArray(X,&bray)); 1390 1391 if (!mumps->myid) { 1392 CHKERRQ(MatSeqAIJGetArray(b->A,&aa)); 1393 CHKERRQ(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1394 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1395 CHKERRQ(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 1396 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1397 } else { 1398 mumps->id.irhs_ptr = NULL; 1399 mumps->id.irhs_sparse = NULL; 1400 mumps->id.nz_rhs = 0; 1401 mumps->id.rhs_sparse = NULL; 1402 } 1403 } 1404 1405 /* solve phase */ 1406 /*-------------*/ 1407 mumps->id.job = JOB_SOLVE; 1408 PetscMUMPS_c(mumps); 1409 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1410 1411 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1412 CHKERRQ(MatDenseGetArray(X,&array)); 1413 CHKERRQ(VecPlaceArray(v_mpi,array)); 1414 1415 /* create scatter scat_sol */ 1416 CHKERRQ(MatGetOwnershipRanges(X,&rstart)); 1417 /* iidx: index for scatter mumps solution to petsc X */ 1418 1419 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from)); 1420 CHKERRQ(PetscMalloc1(nlsol_loc,&idxx)); 1421 for (i=0; i<lsol_loc; i++) { 1422 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1423 1424 for (proc=0; proc<mumps->petsc_size; proc++) { 1425 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1426 myrstart = rstart[proc]; 1427 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1428 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1429 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1430 break; 1431 } 1432 } 1433 1434 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1435 } 1436 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to)); 1437 CHKERRQ(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol)); 1438 CHKERRQ(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1439 CHKERRQ(ISDestroy(&is_from)); 1440 CHKERRQ(ISDestroy(&is_to)); 1441 CHKERRQ(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD)); 1442 CHKERRQ(MatDenseRestoreArray(X,&array)); 1443 1444 /* free spaces */ 1445 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1446 mumps->id.isol_loc = isol_loc_save; 1447 1448 CHKERRQ(PetscFree2(sol_loc,isol_loc)); 1449 CHKERRQ(PetscFree(idxx)); 1450 CHKERRQ(VecDestroy(&msol_loc)); 1451 CHKERRQ(VecDestroy(&v_mpi)); 1452 if (!denseB) { 1453 if (!mumps->myid) { 1454 b = (Mat_MPIAIJ*)Bt->data; 1455 CHKERRQ(MatSeqAIJRestoreArray(b->A,&aa)); 1456 CHKERRQ(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 1457 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1458 } 1459 } else { 1460 if (mumps->ICNTL20 == 0) { 1461 CHKERRQ(VecDestroy(&b_seq)); 1462 CHKERRQ(VecScatterDestroy(&scat_rhs)); 1463 } 1464 } 1465 CHKERRQ(VecScatterDestroy(&scat_sol)); 1466 CHKERRQ(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3))); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1471 { 1472 PetscBool flg; 1473 Mat B; 1474 1475 PetscFunctionBegin; 1476 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 1477 PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1478 1479 /* Create B=Bt^T that uses Bt's data structure */ 1480 CHKERRQ(MatCreateTranspose(Bt,&B)); 1481 1482 CHKERRQ(MatMatSolve_MUMPS(A,B,X)); 1483 CHKERRQ(MatDestroy(&B)); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #if !defined(PETSC_USE_COMPLEX) 1488 /* 1489 input: 1490 F: numeric factor 1491 output: 1492 nneg: total number of negative pivots 1493 nzero: total number of zero pivots 1494 npos: (global dimension of F) - nneg - nzero 1495 */ 1496 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1497 { 1498 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1499 PetscMPIInt size; 1500 1501 PetscFunctionBegin; 1502 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size)); 1503 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1504 PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13)); 1505 1506 if (nneg) *nneg = mumps->id.INFOG(12); 1507 if (nzero || npos) { 1508 PetscCheckFalse(mumps->id.ICNTL(24) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1509 if (nzero) *nzero = mumps->id.INFOG(28); 1510 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 #endif 1515 1516 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1517 { 1518 PetscInt i,nreqs; 1519 PetscMUMPSInt *irn,*jcn; 1520 PetscMPIInt count; 1521 PetscInt64 totnnz,remain; 1522 const PetscInt osize=mumps->omp_comm_size; 1523 PetscScalar *val; 1524 1525 PetscFunctionBegin; 1526 if (osize > 1) { 1527 if (reuse == MAT_INITIAL_MATRIX) { 1528 /* master first gathers counts of nonzeros to receive */ 1529 if (mumps->is_omp_master) CHKERRQ(PetscMalloc1(osize,&mumps->recvcount)); 1530 CHKERRMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm)); 1531 1532 /* Then each computes number of send/recvs */ 1533 if (mumps->is_omp_master) { 1534 /* Start from 1 since self communication is not done in MPI */ 1535 nreqs = 0; 1536 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1537 } else { 1538 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1539 } 1540 CHKERRQ(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */ 1541 1542 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1543 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1544 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1545 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1546 */ 1547 nreqs = 0; /* counter for actual send/recvs */ 1548 if (mumps->is_omp_master) { 1549 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1550 CHKERRQ(PetscMalloc2(totnnz,&irn,totnnz,&jcn)); 1551 CHKERRQ(PetscMalloc1(totnnz,&val)); 1552 1553 /* Self communication */ 1554 CHKERRQ(PetscArraycpy(irn,mumps->irn,mumps->nnz)); 1555 CHKERRQ(PetscArraycpy(jcn,mumps->jcn,mumps->nnz)); 1556 CHKERRQ(PetscArraycpy(val,mumps->val,mumps->nnz)); 1557 1558 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1559 CHKERRQ(PetscFree2(mumps->irn,mumps->jcn)); 1560 CHKERRQ(PetscFree(mumps->val_alloc)); 1561 mumps->nnz = totnnz; 1562 mumps->irn = irn; 1563 mumps->jcn = jcn; 1564 mumps->val = mumps->val_alloc = val; 1565 1566 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1567 jcn += mumps->recvcount[0]; 1568 val += mumps->recvcount[0]; 1569 1570 /* Remote communication */ 1571 for (i=1; i<osize; i++) { 1572 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1573 remain = mumps->recvcount[i] - count; 1574 while (count>0) { 1575 CHKERRMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1576 CHKERRMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1577 CHKERRMPI(MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1578 irn += count; 1579 jcn += count; 1580 val += count; 1581 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1582 remain -= count; 1583 } 1584 } 1585 } else { 1586 irn = mumps->irn; 1587 jcn = mumps->jcn; 1588 val = mumps->val; 1589 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1590 remain = mumps->nnz - count; 1591 while (count>0) { 1592 CHKERRMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1593 CHKERRMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1594 CHKERRMPI(MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1595 irn += count; 1596 jcn += count; 1597 val += count; 1598 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1599 remain -= count; 1600 } 1601 } 1602 } else { 1603 nreqs = 0; 1604 if (mumps->is_omp_master) { 1605 val = mumps->val + mumps->recvcount[0]; 1606 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1607 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1608 remain = mumps->recvcount[i] - count; 1609 while (count>0) { 1610 CHKERRMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1611 val += count; 1612 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1613 remain -= count; 1614 } 1615 } 1616 } else { 1617 val = mumps->val; 1618 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1619 remain = mumps->nnz - count; 1620 while (count>0) { 1621 CHKERRMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++])); 1622 val += count; 1623 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1624 remain -= count; 1625 } 1626 } 1627 } 1628 CHKERRMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE)); 1629 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1635 { 1636 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1637 PetscBool isMPIAIJ; 1638 1639 PetscFunctionBegin; 1640 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1641 if (mumps->id.INFOG(1) == -6) { 1642 CHKERRQ(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1643 } 1644 CHKERRQ(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 CHKERRQ((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 1649 CHKERRQ(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps)); 1650 1651 /* numerical factorization phase */ 1652 /*-------------------------------*/ 1653 mumps->id.job = JOB_FACTNUMERIC; 1654 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1655 if (!mumps->myid) { 1656 mumps->id.a = (MumpsScalar*)mumps->val; 1657 } 1658 } else { 1659 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1660 } 1661 PetscMUMPS_c(mumps); 1662 if (mumps->id.INFOG(1) < 0) { 1663 if (A->erroriffailure) { 1664 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 1665 } else { 1666 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1667 CHKERRQ(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1668 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1669 } else if (mumps->id.INFOG(1) == -13) { 1670 CHKERRQ(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1671 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1672 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1673 CHKERRQ(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1674 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1675 } else { 1676 CHKERRQ(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1677 F->factorerrortype = MAT_FACTOR_OTHER; 1678 } 1679 } 1680 } 1681 PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16)); 1682 1683 F->assembled = PETSC_TRUE; 1684 1685 if (F->schur) { /* reset Schur status to unfactored */ 1686 #if defined(PETSC_HAVE_CUDA) 1687 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1688 #endif 1689 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1690 mumps->id.ICNTL(19) = 2; 1691 CHKERRQ(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur)); 1692 } 1693 CHKERRQ(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED)); 1694 } 1695 1696 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1697 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1698 1699 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1700 if (mumps->petsc_size > 1) { 1701 PetscInt lsol_loc; 1702 PetscScalar *sol_loc; 1703 1704 CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ)); 1705 1706 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1707 if (mumps->x_seq) { 1708 CHKERRQ(VecScatterDestroy(&mumps->scat_sol)); 1709 CHKERRQ(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc)); 1710 CHKERRQ(VecDestroy(&mumps->x_seq)); 1711 } 1712 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1713 CHKERRQ(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc)); 1714 mumps->id.lsol_loc = lsol_loc; 1715 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1716 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq)); 1717 } 1718 CHKERRQ(PetscLogFlops(mumps->id.RINFO(2))); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 /* Sets MUMPS options from the options database */ 1723 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1724 { 1725 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1726 PetscErrorCode ierr; 1727 PetscMUMPSInt icntl=0; 1728 PetscInt info[80],i,ninfo=80; 1729 PetscBool flg=PETSC_FALSE; 1730 1731 PetscFunctionBegin; 1732 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1733 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg)); 1734 if (flg) mumps->id.ICNTL(1) = icntl; 1735 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg)); 1736 if (flg) mumps->id.ICNTL(2) = icntl; 1737 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg)); 1738 if (flg) mumps->id.ICNTL(3) = icntl; 1739 1740 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg)); 1741 if (flg) mumps->id.ICNTL(4) = icntl; 1742 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1743 1744 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg)); 1745 if (flg) mumps->id.ICNTL(6) = icntl; 1746 1747 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg)); 1748 if (flg) { 1749 PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 1750 mumps->id.ICNTL(7) = icntl; 1751 } 1752 1753 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL)); 1754 /* CHKERRQ(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 1755 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL)); 1756 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL)); 1757 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL)); 1758 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL)); 1759 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL)); 1760 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL)); 1761 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1762 CHKERRQ(MatDestroy(&F->schur)); 1763 CHKERRQ(MatMumpsResetSchur_Private(mumps)); 1764 } 1765 1766 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 1767 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1768 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1769 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 1770 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 1771 In short, we could not use distributed RHS with MPICH until v4.0b1. 1772 */ 1773 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101)) 1774 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1775 #else 1776 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1777 #endif 1778 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg)); 1779 PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20); 1780 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1781 PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 1782 #endif 1783 /* CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 1784 1785 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL)); 1786 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL)); 1787 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL)); 1788 if (mumps->id.ICNTL(24)) { 1789 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1790 } 1791 1792 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL)); 1793 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL)); 1794 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL)); 1795 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL)); 1796 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL)); 1797 /* CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 1798 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL)); 1799 /* CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 1800 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL)); 1801 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL)); 1802 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL)); 1803 CHKERRQ(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL)); 1804 1805 CHKERRQ(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL)); 1806 CHKERRQ(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL)); 1807 CHKERRQ(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL)); 1808 CHKERRQ(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL)); 1809 CHKERRQ(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL)); 1810 CHKERRQ(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL)); 1811 1812 CHKERRQ(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 1813 1814 CHKERRQ(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL)); 1815 if (ninfo) { 1816 PetscCheckFalse(ninfo > 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo); 1817 CHKERRQ(PetscMalloc1(ninfo,&mumps->info)); 1818 mumps->ninfo = ninfo; 1819 for (i=0; i<ninfo; i++) { 1820 PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo); 1821 else mumps->info[i] = info[i]; 1822 } 1823 } 1824 1825 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1830 { 1831 PetscInt nthreads=0; 1832 1833 PetscFunctionBegin; 1834 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1835 CHKERRMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size)); 1836 CHKERRMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */ 1837 1838 CHKERRQ(PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support)); 1839 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1840 CHKERRQ(PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL)); 1841 if (mumps->use_petsc_omp_support) { 1842 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1843 CHKERRQ(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl)); 1844 CHKERRQ(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master)); 1845 #else 1846 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:""); 1847 #endif 1848 } else { 1849 mumps->omp_comm = PETSC_COMM_SELF; 1850 mumps->mumps_comm = mumps->petsc_comm; 1851 mumps->is_omp_master = PETSC_TRUE; 1852 } 1853 CHKERRMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size)); 1854 mumps->reqs = NULL; 1855 mumps->tag = 0; 1856 1857 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1858 if (mumps->mumps_comm != MPI_COMM_NULL) { 1859 CHKERRQ(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm)); 1860 } 1861 1862 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1863 mumps->id.job = JOB_INIT; 1864 mumps->id.par = 1; /* host participates factorizaton and solve */ 1865 mumps->id.sym = mumps->sym; 1866 1867 PetscMUMPS_c(mumps); 1868 PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1)); 1869 1870 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1871 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1872 */ 1873 CHKERRMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm)); 1874 CHKERRMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm)); 1875 1876 mumps->scat_rhs = NULL; 1877 mumps->scat_sol = NULL; 1878 1879 /* set PETSc-MUMPS default options - override MUMPS default */ 1880 mumps->id.ICNTL(3) = 0; 1881 mumps->id.ICNTL(4) = 0; 1882 if (mumps->petsc_size == 1) { 1883 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1884 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1885 } else { 1886 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1887 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1888 } 1889 1890 /* schur */ 1891 mumps->id.size_schur = 0; 1892 mumps->id.listvar_schur = NULL; 1893 mumps->id.schur = NULL; 1894 mumps->sizeredrhs = 0; 1895 mumps->schur_sol = NULL; 1896 mumps->schur_sizesol = 0; 1897 PetscFunctionReturn(0); 1898 } 1899 1900 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1901 { 1902 PetscFunctionBegin; 1903 if (mumps->id.INFOG(1) < 0) { 1904 if (A->erroriffailure) { 1905 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1)); 1906 } else { 1907 if (mumps->id.INFOG(1) == -6) { 1908 CHKERRQ(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1909 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1910 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1911 CHKERRQ(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1912 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1913 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1914 CHKERRQ(PetscInfo(F,"Empty matrix\n")); 1915 } else { 1916 CHKERRQ(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2))); 1917 F->factorerrortype = MAT_FACTOR_OTHER; 1918 } 1919 } 1920 } 1921 PetscFunctionReturn(0); 1922 } 1923 1924 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1925 { 1926 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1927 Vec b; 1928 const PetscInt M = A->rmap->N; 1929 1930 PetscFunctionBegin; 1931 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 1932 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 1933 PetscFunctionReturn(0); 1934 } 1935 1936 /* Set MUMPS options from the options database */ 1937 CHKERRQ(PetscSetMUMPSFromOptions(F,A)); 1938 1939 CHKERRQ((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 1940 CHKERRQ(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 1941 1942 /* analysis phase */ 1943 /*----------------*/ 1944 mumps->id.job = JOB_FACTSYMBOLIC; 1945 mumps->id.n = M; 1946 switch (mumps->id.ICNTL(18)) { 1947 case 0: /* centralized assembled matrix input */ 1948 if (!mumps->myid) { 1949 mumps->id.nnz = mumps->nnz; 1950 mumps->id.irn = mumps->irn; 1951 mumps->id.jcn = mumps->jcn; 1952 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1953 if (r) { 1954 mumps->id.ICNTL(7) = 1; 1955 if (!mumps->myid) { 1956 const PetscInt *idx; 1957 PetscInt i; 1958 1959 CHKERRQ(PetscMalloc1(M,&mumps->id.perm_in)); 1960 CHKERRQ(ISGetIndices(r,&idx)); 1961 for (i=0; i<M; i++) CHKERRQ(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */ 1962 CHKERRQ(ISRestoreIndices(r,&idx)); 1963 } 1964 } 1965 } 1966 break; 1967 case 3: /* distributed assembled matrix input (size>1) */ 1968 mumps->id.nnz_loc = mumps->nnz; 1969 mumps->id.irn_loc = mumps->irn; 1970 mumps->id.jcn_loc = mumps->jcn; 1971 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1972 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1973 CHKERRQ(MatCreateVecs(A,NULL,&b)); 1974 CHKERRQ(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 1975 CHKERRQ(VecDestroy(&b)); 1976 } 1977 break; 1978 } 1979 PetscMUMPS_c(mumps); 1980 CHKERRQ(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 1981 1982 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1983 F->ops->solve = MatSolve_MUMPS; 1984 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1985 F->ops->matsolve = MatMatSolve_MUMPS; 1986 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1987 1988 mumps->matstruc = SAME_NONZERO_PATTERN; 1989 PetscFunctionReturn(0); 1990 } 1991 1992 /* Note the Petsc r and c permutations are ignored */ 1993 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1994 { 1995 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1996 Vec b; 1997 const PetscInt M = A->rmap->N; 1998 1999 PetscFunctionBegin; 2000 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2001 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2002 PetscFunctionReturn(0); 2003 } 2004 2005 /* Set MUMPS options from the options database */ 2006 CHKERRQ(PetscSetMUMPSFromOptions(F,A)); 2007 2008 CHKERRQ((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2009 CHKERRQ(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2010 2011 /* analysis phase */ 2012 /*----------------*/ 2013 mumps->id.job = JOB_FACTSYMBOLIC; 2014 mumps->id.n = M; 2015 switch (mumps->id.ICNTL(18)) { 2016 case 0: /* centralized assembled matrix input */ 2017 if (!mumps->myid) { 2018 mumps->id.nnz = mumps->nnz; 2019 mumps->id.irn = mumps->irn; 2020 mumps->id.jcn = mumps->jcn; 2021 if (mumps->id.ICNTL(6)>1) { 2022 mumps->id.a = (MumpsScalar*)mumps->val; 2023 } 2024 } 2025 break; 2026 case 3: /* distributed assembled matrix input (size>1) */ 2027 mumps->id.nnz_loc = mumps->nnz; 2028 mumps->id.irn_loc = mumps->irn; 2029 mumps->id.jcn_loc = mumps->jcn; 2030 if (mumps->id.ICNTL(6)>1) { 2031 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2032 } 2033 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2034 CHKERRQ(MatCreateVecs(A,NULL,&b)); 2035 CHKERRQ(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2036 CHKERRQ(VecDestroy(&b)); 2037 } 2038 break; 2039 } 2040 PetscMUMPS_c(mumps); 2041 CHKERRQ(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2042 2043 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2044 F->ops->solve = MatSolve_MUMPS; 2045 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2046 2047 mumps->matstruc = SAME_NONZERO_PATTERN; 2048 PetscFunctionReturn(0); 2049 } 2050 2051 /* Note the Petsc r permutation and factor info are ignored */ 2052 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2053 { 2054 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2055 Vec b; 2056 const PetscInt M = A->rmap->N; 2057 2058 PetscFunctionBegin; 2059 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2060 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /* Set MUMPS options from the options database */ 2065 CHKERRQ(PetscSetMUMPSFromOptions(F,A)); 2066 2067 CHKERRQ((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2068 CHKERRQ(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps)); 2069 2070 /* analysis phase */ 2071 /*----------------*/ 2072 mumps->id.job = JOB_FACTSYMBOLIC; 2073 mumps->id.n = M; 2074 switch (mumps->id.ICNTL(18)) { 2075 case 0: /* centralized assembled matrix input */ 2076 if (!mumps->myid) { 2077 mumps->id.nnz = mumps->nnz; 2078 mumps->id.irn = mumps->irn; 2079 mumps->id.jcn = mumps->jcn; 2080 if (mumps->id.ICNTL(6)>1) { 2081 mumps->id.a = (MumpsScalar*)mumps->val; 2082 } 2083 } 2084 break; 2085 case 3: /* distributed assembled matrix input (size>1) */ 2086 mumps->id.nnz_loc = mumps->nnz; 2087 mumps->id.irn_loc = mumps->irn; 2088 mumps->id.jcn_loc = mumps->jcn; 2089 if (mumps->id.ICNTL(6)>1) { 2090 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2091 } 2092 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2093 CHKERRQ(MatCreateVecs(A,NULL,&b)); 2094 CHKERRQ(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq)); 2095 CHKERRQ(VecDestroy(&b)); 2096 } 2097 break; 2098 } 2099 PetscMUMPS_c(mumps); 2100 CHKERRQ(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps)); 2101 2102 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2103 F->ops->solve = MatSolve_MUMPS; 2104 F->ops->solvetranspose = MatSolve_MUMPS; 2105 F->ops->matsolve = MatMatSolve_MUMPS; 2106 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2107 #if defined(PETSC_USE_COMPLEX) 2108 F->ops->getinertia = NULL; 2109 #else 2110 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2111 #endif 2112 2113 mumps->matstruc = SAME_NONZERO_PATTERN; 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2118 { 2119 PetscBool iascii; 2120 PetscViewerFormat format; 2121 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2122 2123 PetscFunctionBegin; 2124 /* check if matrix is mumps type */ 2125 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2126 2127 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 2128 if (iascii) { 2129 CHKERRQ(PetscViewerGetFormat(viewer,&format)); 2130 if (format == PETSC_VIEWER_ASCII_INFO) { 2131 CHKERRQ(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n")); 2132 CHKERRQ(PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym)); 2133 CHKERRQ(PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par)); 2134 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1))); 2135 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2))); 2136 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3))); 2137 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4))); 2138 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5))); 2139 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6))); 2140 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7))); 2141 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8))); 2142 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10))); 2143 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11))); 2144 if (mumps->id.ICNTL(11)>0) { 2145 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4))); 2146 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5))); 2147 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6))); 2148 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8))); 2149 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9))); 2150 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11))); 2151 } 2152 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12))); 2153 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13))); 2154 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14))); 2155 /* ICNTL(15-17) not used */ 2156 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18))); 2157 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19))); 2158 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20))); 2159 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21))); 2160 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22))); 2161 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23))); 2162 2163 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24))); 2164 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25))); 2165 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26))); 2166 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27))); 2167 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28))); 2168 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29))); 2169 2170 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30))); 2171 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31))); 2172 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33))); 2173 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35))); 2174 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36))); 2175 CHKERRQ(PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38))); 2176 2177 CHKERRQ(PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1))); 2178 CHKERRQ(PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2))); 2179 CHKERRQ(PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3))); 2180 CHKERRQ(PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4))); 2181 CHKERRQ(PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5))); 2182 CHKERRQ(PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7))); 2183 2184 /* information local to each processor */ 2185 CHKERRQ(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n")); 2186 CHKERRQ(PetscViewerASCIIPushSynchronized(viewer)); 2187 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1))); 2188 CHKERRQ(PetscViewerFlush(viewer)); 2189 CHKERRQ(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n")); 2190 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2))); 2191 CHKERRQ(PetscViewerFlush(viewer)); 2192 CHKERRQ(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n")); 2193 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3))); 2194 CHKERRQ(PetscViewerFlush(viewer)); 2195 2196 CHKERRQ(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n")); 2197 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15))); 2198 CHKERRQ(PetscViewerFlush(viewer)); 2199 2200 CHKERRQ(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n")); 2201 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16))); 2202 CHKERRQ(PetscViewerFlush(viewer)); 2203 2204 CHKERRQ(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n")); 2205 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23))); 2206 CHKERRQ(PetscViewerFlush(viewer)); 2207 2208 if (mumps->ninfo && mumps->ninfo <= 80) { 2209 PetscInt i; 2210 for (i=0; i<mumps->ninfo; i++) { 2211 CHKERRQ(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i])); 2212 CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]))); 2213 CHKERRQ(PetscViewerFlush(viewer)); 2214 } 2215 } 2216 CHKERRQ(PetscViewerASCIIPopSynchronized(viewer)); 2217 2218 if (!mumps->myid) { /* information from the host */ 2219 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1))); 2220 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2))); 2221 CHKERRQ(PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3))); 2222 CHKERRQ(PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34))); 2223 2224 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3))); 2225 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4))); 2226 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5))); 2227 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6))); 2228 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7))); 2229 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8))); 2230 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9))); 2231 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10))); 2232 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11))); 2233 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12))); 2234 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13))); 2235 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14))); 2236 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15))); 2237 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16))); 2238 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17))); 2239 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18))); 2240 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19))); 2241 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20))); 2242 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21))); 2243 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22))); 2244 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23))); 2245 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24))); 2246 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25))); 2247 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28))); 2248 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29))); 2249 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31))); 2250 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32))); 2251 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33))); 2252 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34))); 2253 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35))); 2254 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36))); 2255 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37))); 2256 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38))); 2257 CHKERRQ(PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39))); 2258 } 2259 } 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2265 { 2266 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2267 2268 PetscFunctionBegin; 2269 info->block_size = 1.0; 2270 info->nz_allocated = mumps->id.INFOG(20); 2271 info->nz_used = mumps->id.INFOG(20); 2272 info->nz_unneeded = 0.0; 2273 info->assemblies = 0.0; 2274 info->mallocs = 0.0; 2275 info->memory = 0.0; 2276 info->fill_ratio_given = 0; 2277 info->fill_ratio_needed = 0; 2278 info->factor_mallocs = 0; 2279 PetscFunctionReturn(0); 2280 } 2281 2282 /* -------------------------------------------------------------------------------------------*/ 2283 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2284 { 2285 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2286 const PetscScalar *arr; 2287 const PetscInt *idxs; 2288 PetscInt size,i; 2289 2290 PetscFunctionBegin; 2291 CHKERRQ(ISGetLocalSize(is,&size)); 2292 if (mumps->petsc_size > 1) { 2293 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2294 2295 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2296 CHKERRMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm)); 2297 PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2298 } 2299 2300 /* Schur complement matrix */ 2301 CHKERRQ(MatDestroy(&F->schur)); 2302 CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur)); 2303 CHKERRQ(MatDenseGetArrayRead(F->schur,&arr)); 2304 mumps->id.schur = (MumpsScalar*)arr; 2305 mumps->id.size_schur = size; 2306 mumps->id.schur_lld = size; 2307 CHKERRQ(MatDenseRestoreArrayRead(F->schur,&arr)); 2308 if (mumps->sym == 1) { 2309 CHKERRQ(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE)); 2310 } 2311 2312 /* MUMPS expects Fortran style indices */ 2313 CHKERRQ(PetscFree(mumps->id.listvar_schur)); 2314 CHKERRQ(PetscMalloc1(size,&mumps->id.listvar_schur)); 2315 CHKERRQ(ISGetIndices(is,&idxs)); 2316 for (i=0; i<size; i++) CHKERRQ(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]))); 2317 CHKERRQ(ISRestoreIndices(is,&idxs)); 2318 if (mumps->petsc_size > 1) { 2319 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2320 } else { 2321 if (F->factortype == MAT_FACTOR_LU) { 2322 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2323 } else { 2324 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2325 } 2326 } 2327 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2328 mumps->id.ICNTL(26) = -1; 2329 PetscFunctionReturn(0); 2330 } 2331 2332 /* -------------------------------------------------------------------------------------------*/ 2333 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2334 { 2335 Mat St; 2336 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2337 PetscScalar *array; 2338 #if defined(PETSC_USE_COMPLEX) 2339 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2340 #endif 2341 2342 PetscFunctionBegin; 2343 PetscCheckFalse(!mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2344 CHKERRQ(MatCreate(PETSC_COMM_SELF,&St)); 2345 CHKERRQ(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur)); 2346 CHKERRQ(MatSetType(St,MATDENSE)); 2347 CHKERRQ(MatSetUp(St)); 2348 CHKERRQ(MatDenseGetArray(St,&array)); 2349 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2350 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2351 PetscInt i,j,N=mumps->id.size_schur; 2352 for (i=0;i<N;i++) { 2353 for (j=0;j<N;j++) { 2354 #if !defined(PETSC_USE_COMPLEX) 2355 PetscScalar val = mumps->id.schur[i*N+j]; 2356 #else 2357 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2358 #endif 2359 array[j*N+i] = val; 2360 } 2361 } 2362 } else { /* stored by columns */ 2363 CHKERRQ(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2364 } 2365 } else { /* either full or lower-triangular (not packed) */ 2366 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2367 PetscInt i,j,N=mumps->id.size_schur; 2368 for (i=0;i<N;i++) { 2369 for (j=i;j<N;j++) { 2370 #if !defined(PETSC_USE_COMPLEX) 2371 PetscScalar val = mumps->id.schur[i*N+j]; 2372 #else 2373 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2374 #endif 2375 array[i*N+j] = val; 2376 array[j*N+i] = val; 2377 } 2378 } 2379 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2380 CHKERRQ(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur)); 2381 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2382 PetscInt i,j,N=mumps->id.size_schur; 2383 for (i=0;i<N;i++) { 2384 for (j=0;j<i+1;j++) { 2385 #if !defined(PETSC_USE_COMPLEX) 2386 PetscScalar val = mumps->id.schur[i*N+j]; 2387 #else 2388 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2389 #endif 2390 array[i*N+j] = val; 2391 array[j*N+i] = val; 2392 } 2393 } 2394 } 2395 } 2396 CHKERRQ(MatDenseRestoreArray(St,&array)); 2397 *S = St; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 /* -------------------------------------------------------------------------------------------*/ 2402 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2403 { 2404 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2405 2406 PetscFunctionBegin; 2407 CHKERRQ(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl))); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2412 { 2413 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2414 2415 PetscFunctionBegin; 2416 *ival = mumps->id.ICNTL(icntl); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 /*@ 2421 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2422 2423 Logically Collective on Mat 2424 2425 Input Parameters: 2426 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2427 . icntl - index of MUMPS parameter array ICNTL() 2428 - ival - value of MUMPS ICNTL(icntl) 2429 2430 Options Database: 2431 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival 2432 2433 Level: beginner 2434 2435 References: 2436 . * - MUMPS Users' Guide 2437 2438 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2439 @*/ 2440 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2441 { 2442 PetscFunctionBegin; 2443 PetscValidType(F,1); 2444 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2445 PetscValidLogicalCollectiveInt(F,icntl,2); 2446 PetscValidLogicalCollectiveInt(F,ival,3); 2447 CHKERRQ(PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival))); 2448 PetscFunctionReturn(0); 2449 } 2450 2451 /*@ 2452 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2453 2454 Logically Collective on Mat 2455 2456 Input Parameters: 2457 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2458 - icntl - index of MUMPS parameter array ICNTL() 2459 2460 Output Parameter: 2461 . ival - value of MUMPS ICNTL(icntl) 2462 2463 Level: beginner 2464 2465 References: 2466 . * - MUMPS Users' Guide 2467 2468 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2469 @*/ 2470 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2471 { 2472 PetscFunctionBegin; 2473 PetscValidType(F,1); 2474 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2475 PetscValidLogicalCollectiveInt(F,icntl,2); 2476 PetscValidIntPointer(ival,3); 2477 CHKERRQ(PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival))); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 /* -------------------------------------------------------------------------------------------*/ 2482 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2483 { 2484 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2485 2486 PetscFunctionBegin; 2487 mumps->id.CNTL(icntl) = val; 2488 PetscFunctionReturn(0); 2489 } 2490 2491 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2492 { 2493 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2494 2495 PetscFunctionBegin; 2496 *val = mumps->id.CNTL(icntl); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 /*@ 2501 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2502 2503 Logically Collective on Mat 2504 2505 Input Parameters: 2506 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2507 . icntl - index of MUMPS parameter array CNTL() 2508 - val - value of MUMPS CNTL(icntl) 2509 2510 Options Database: 2511 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 2512 2513 Level: beginner 2514 2515 References: 2516 . * - MUMPS Users' Guide 2517 2518 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2519 @*/ 2520 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2521 { 2522 PetscFunctionBegin; 2523 PetscValidType(F,1); 2524 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2525 PetscValidLogicalCollectiveInt(F,icntl,2); 2526 PetscValidLogicalCollectiveReal(F,val,3); 2527 CHKERRQ(PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val))); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 /*@ 2532 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2533 2534 Logically Collective on Mat 2535 2536 Input Parameters: 2537 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2538 - icntl - index of MUMPS parameter array CNTL() 2539 2540 Output Parameter: 2541 . val - value of MUMPS CNTL(icntl) 2542 2543 Level: beginner 2544 2545 References: 2546 . * - MUMPS Users' Guide 2547 2548 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2549 @*/ 2550 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2551 { 2552 PetscFunctionBegin; 2553 PetscValidType(F,1); 2554 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2555 PetscValidLogicalCollectiveInt(F,icntl,2); 2556 PetscValidRealPointer(val,3); 2557 CHKERRQ(PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val))); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2562 { 2563 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2564 2565 PetscFunctionBegin; 2566 *info = mumps->id.INFO(icntl); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2571 { 2572 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2573 2574 PetscFunctionBegin; 2575 *infog = mumps->id.INFOG(icntl); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2580 { 2581 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2582 2583 PetscFunctionBegin; 2584 *rinfo = mumps->id.RINFO(icntl); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2589 { 2590 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2591 2592 PetscFunctionBegin; 2593 *rinfog = mumps->id.RINFOG(icntl); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2598 { 2599 Mat Bt = NULL,Btseq = NULL; 2600 PetscBool flg; 2601 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2602 PetscScalar *aa; 2603 PetscInt spnr,*ia,*ja,M,nrhs; 2604 2605 PetscFunctionBegin; 2606 PetscValidPointer(spRHS,2); 2607 CHKERRQ(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg)); 2608 if (flg) { 2609 CHKERRQ(MatTransposeGetMat(spRHS,&Bt)); 2610 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2611 2612 CHKERRQ(MatMumpsSetIcntl(F,30,1)); 2613 2614 if (mumps->petsc_size > 1) { 2615 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2616 Btseq = b->A; 2617 } else { 2618 Btseq = Bt; 2619 } 2620 2621 CHKERRQ(MatGetSize(spRHS,&M,&nrhs)); 2622 mumps->id.nrhs = nrhs; 2623 mumps->id.lrhs = M; 2624 mumps->id.rhs = NULL; 2625 2626 if (!mumps->myid) { 2627 CHKERRQ(MatSeqAIJGetArray(Btseq,&aa)); 2628 CHKERRQ(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2629 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2630 CHKERRQ(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs)); 2631 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2632 } else { 2633 mumps->id.irhs_ptr = NULL; 2634 mumps->id.irhs_sparse = NULL; 2635 mumps->id.nz_rhs = 0; 2636 mumps->id.rhs_sparse = NULL; 2637 } 2638 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2639 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2640 2641 /* solve phase */ 2642 /*-------------*/ 2643 mumps->id.job = JOB_SOLVE; 2644 PetscMUMPS_c(mumps); 2645 if (mumps->id.INFOG(1) < 0) 2646 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2)); 2647 2648 if (!mumps->myid) { 2649 CHKERRQ(MatSeqAIJRestoreArray(Btseq,&aa)); 2650 CHKERRQ(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg)); 2651 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2652 } 2653 PetscFunctionReturn(0); 2654 } 2655 2656 /*@ 2657 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2658 2659 Logically Collective on Mat 2660 2661 Input Parameters: 2662 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2663 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2664 2665 Output Parameter: 2666 . spRHS - requested entries of inverse of A 2667 2668 Level: beginner 2669 2670 References: 2671 . * - MUMPS Users' Guide 2672 2673 .seealso: MatGetFactor(), MatCreateTranspose() 2674 @*/ 2675 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2676 { 2677 PetscFunctionBegin; 2678 PetscValidType(F,1); 2679 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2680 CHKERRQ(PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS))); 2681 PetscFunctionReturn(0); 2682 } 2683 2684 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2685 { 2686 Mat spRHS; 2687 2688 PetscFunctionBegin; 2689 CHKERRQ(MatCreateTranspose(spRHST,&spRHS)); 2690 CHKERRQ(MatMumpsGetInverse_MUMPS(F,spRHS)); 2691 CHKERRQ(MatDestroy(&spRHS)); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 /*@ 2696 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2697 2698 Logically Collective on Mat 2699 2700 Input Parameters: 2701 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2702 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2703 2704 Output Parameter: 2705 . spRHST - requested entries of inverse of A^T 2706 2707 Level: beginner 2708 2709 References: 2710 . * - MUMPS Users' Guide 2711 2712 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2713 @*/ 2714 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2715 { 2716 PetscBool flg; 2717 2718 PetscFunctionBegin; 2719 PetscValidType(F,1); 2720 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2721 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL)); 2722 PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2723 2724 CHKERRQ(PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST))); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 /*@ 2729 MatMumpsGetInfo - Get MUMPS parameter INFO() 2730 2731 Logically Collective on Mat 2732 2733 Input Parameters: 2734 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2735 - icntl - index of MUMPS parameter array INFO() 2736 2737 Output Parameter: 2738 . ival - value of MUMPS INFO(icntl) 2739 2740 Level: beginner 2741 2742 References: 2743 . * - MUMPS Users' Guide 2744 2745 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2746 @*/ 2747 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2748 { 2749 PetscFunctionBegin; 2750 PetscValidType(F,1); 2751 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2752 PetscValidIntPointer(ival,3); 2753 CHKERRQ(PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival))); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 /*@ 2758 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2759 2760 Logically Collective on Mat 2761 2762 Input Parameters: 2763 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2764 - icntl - index of MUMPS parameter array INFOG() 2765 2766 Output Parameter: 2767 . ival - value of MUMPS INFOG(icntl) 2768 2769 Level: beginner 2770 2771 References: 2772 . * - MUMPS Users' Guide 2773 2774 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2775 @*/ 2776 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2777 { 2778 PetscFunctionBegin; 2779 PetscValidType(F,1); 2780 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2781 PetscValidIntPointer(ival,3); 2782 CHKERRQ(PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival))); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 /*@ 2787 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2788 2789 Logically Collective on Mat 2790 2791 Input Parameters: 2792 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2793 - icntl - index of MUMPS parameter array RINFO() 2794 2795 Output Parameter: 2796 . val - value of MUMPS RINFO(icntl) 2797 2798 Level: beginner 2799 2800 References: 2801 . * - MUMPS Users' Guide 2802 2803 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2804 @*/ 2805 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2806 { 2807 PetscFunctionBegin; 2808 PetscValidType(F,1); 2809 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2810 PetscValidRealPointer(val,3); 2811 CHKERRQ(PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val))); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 /*@ 2816 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2817 2818 Logically Collective on Mat 2819 2820 Input Parameters: 2821 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2822 - icntl - index of MUMPS parameter array RINFOG() 2823 2824 Output Parameter: 2825 . val - value of MUMPS RINFOG(icntl) 2826 2827 Level: beginner 2828 2829 References: 2830 . * - MUMPS Users' Guide 2831 2832 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2833 @*/ 2834 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2835 { 2836 PetscFunctionBegin; 2837 PetscValidType(F,1); 2838 PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2839 PetscValidRealPointer(val,3); 2840 CHKERRQ(PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val))); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 /*MC 2845 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2846 distributed and sequential matrices via the external package MUMPS. 2847 2848 Works with MATAIJ and MATSBAIJ matrices 2849 2850 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2851 2852 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2853 2854 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2855 2856 Options Database Keys: 2857 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2858 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2859 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2860 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2861 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2862 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 2863 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 2864 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2865 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2866 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2867 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2868 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2869 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2870 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2871 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2872 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2873 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2874 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2875 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2876 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2877 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2878 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2879 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2880 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2881 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2882 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2883 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2884 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2885 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2886 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2887 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2888 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2889 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2890 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2891 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2892 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2893 2894 Level: beginner 2895 2896 Notes: 2897 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2898 2899 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2900 $ KSPGetPC(ksp,&pc); 2901 $ PCFactorGetMatrix(pc,&mat); 2902 $ MatMumpsGetInfo(mat,....); 2903 $ MatMumpsGetInfog(mat,....); etc. 2904 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2905 2906 Using MUMPS with 64-bit integers 2907 MUMPS provides 64-bit integer support in two build modes: 2908 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 2909 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 2910 2911 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 2912 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 2913 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 2914 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 2915 2916 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 2917 2918 Two modes to run MUMPS/PETSc with OpenMP 2919 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2920 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2921 2922 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2923 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2924 2925 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2926 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2927 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2928 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2929 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2930 2931 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2932 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2933 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2934 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2935 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2936 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2937 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2938 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2939 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2940 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2941 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2942 examine the mapping result. 2943 2944 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2945 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2946 calls omp_set_num_threads(m) internally before calling MUMPS. 2947 2948 References: 2949 + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2950 - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2951 2952 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix() 2953 2954 M*/ 2955 2956 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2957 { 2958 PetscFunctionBegin; 2959 *type = MATSOLVERMUMPS; 2960 PetscFunctionReturn(0); 2961 } 2962 2963 /* MatGetFactor for Seq and MPI AIJ matrices */ 2964 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2965 { 2966 Mat B; 2967 Mat_MUMPS *mumps; 2968 PetscBool isSeqAIJ; 2969 PetscMPIInt size; 2970 2971 PetscFunctionBegin; 2972 #if defined(PETSC_USE_COMPLEX) 2973 PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2974 #endif 2975 /* Create the factorization matrix */ 2976 CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ)); 2977 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2978 CHKERRQ(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2979 CHKERRQ(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 2980 CHKERRQ(MatSetUp(B)); 2981 2982 CHKERRQ(PetscNewLog(B,&mumps)); 2983 2984 B->ops->view = MatView_MUMPS; 2985 B->ops->getinfo = MatGetInfo_MUMPS; 2986 2987 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 2988 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 2989 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 2990 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 2991 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 2992 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 2993 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 2994 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 2995 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 2996 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 2997 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 2998 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 2999 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3000 3001 if (ftype == MAT_FACTOR_LU) { 3002 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3003 B->factortype = MAT_FACTOR_LU; 3004 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3005 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3006 CHKERRQ(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3007 mumps->sym = 0; 3008 } else { 3009 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3010 B->factortype = MAT_FACTOR_CHOLESKY; 3011 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3012 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3013 CHKERRQ(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3014 #if defined(PETSC_USE_COMPLEX) 3015 mumps->sym = 2; 3016 #else 3017 if (A->spd_set && A->spd) mumps->sym = 1; 3018 else mumps->sym = 2; 3019 #endif 3020 } 3021 3022 /* set solvertype */ 3023 CHKERRQ(PetscFree(B->solvertype)); 3024 CHKERRQ(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3025 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3026 if (size == 1) { 3027 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3028 B->canuseordering = PETSC_TRUE; 3029 } 3030 B->ops->destroy = MatDestroy_MUMPS; 3031 B->data = (void*)mumps; 3032 3033 CHKERRQ(PetscInitializeMUMPS(A,mumps)); 3034 3035 *F = B; 3036 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3037 PetscFunctionReturn(0); 3038 } 3039 3040 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3041 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3042 { 3043 Mat B; 3044 Mat_MUMPS *mumps; 3045 PetscBool isSeqSBAIJ; 3046 PetscMPIInt size; 3047 3048 PetscFunctionBegin; 3049 #if defined(PETSC_USE_COMPLEX) 3050 PetscCheckFalse(A->hermitian && !A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3051 #endif 3052 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3053 CHKERRQ(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3054 CHKERRQ(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3055 CHKERRQ(MatSetUp(B)); 3056 3057 CHKERRQ(PetscNewLog(B,&mumps)); 3058 CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ)); 3059 if (isSeqSBAIJ) { 3060 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3061 } else { 3062 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3063 } 3064 3065 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3066 B->ops->view = MatView_MUMPS; 3067 B->ops->getinfo = MatGetInfo_MUMPS; 3068 3069 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3070 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3071 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3072 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3073 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3074 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3075 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3076 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3077 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3078 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3079 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3080 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3081 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3082 3083 B->factortype = MAT_FACTOR_CHOLESKY; 3084 #if defined(PETSC_USE_COMPLEX) 3085 mumps->sym = 2; 3086 #else 3087 if (A->spd_set && A->spd) mumps->sym = 1; 3088 else mumps->sym = 2; 3089 #endif 3090 3091 /* set solvertype */ 3092 CHKERRQ(PetscFree(B->solvertype)); 3093 CHKERRQ(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3094 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3095 if (size == 1) { 3096 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3097 B->canuseordering = PETSC_TRUE; 3098 } 3099 CHKERRQ(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 3100 B->ops->destroy = MatDestroy_MUMPS; 3101 B->data = (void*)mumps; 3102 3103 CHKERRQ(PetscInitializeMUMPS(A,mumps)); 3104 3105 *F = B; 3106 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3107 PetscFunctionReturn(0); 3108 } 3109 3110 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3111 { 3112 Mat B; 3113 Mat_MUMPS *mumps; 3114 PetscBool isSeqBAIJ; 3115 PetscMPIInt size; 3116 3117 PetscFunctionBegin; 3118 /* Create the factorization matrix */ 3119 CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ)); 3120 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3121 CHKERRQ(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3122 CHKERRQ(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3123 CHKERRQ(MatSetUp(B)); 3124 3125 CHKERRQ(PetscNewLog(B,&mumps)); 3126 if (ftype == MAT_FACTOR_LU) { 3127 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3128 B->factortype = MAT_FACTOR_LU; 3129 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3130 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3131 mumps->sym = 0; 3132 CHKERRQ(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3133 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 3134 3135 B->ops->view = MatView_MUMPS; 3136 B->ops->getinfo = MatGetInfo_MUMPS; 3137 3138 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3139 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3140 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3141 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3142 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3143 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3144 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3145 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3146 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3147 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3148 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3149 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS)); 3150 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS)); 3151 3152 /* set solvertype */ 3153 CHKERRQ(PetscFree(B->solvertype)); 3154 CHKERRQ(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3155 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3156 if (size == 1) { 3157 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3158 B->canuseordering = PETSC_TRUE; 3159 } 3160 B->ops->destroy = MatDestroy_MUMPS; 3161 B->data = (void*)mumps; 3162 3163 CHKERRQ(PetscInitializeMUMPS(A,mumps)); 3164 3165 *F = B; 3166 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3167 PetscFunctionReturn(0); 3168 } 3169 3170 /* MatGetFactor for Seq and MPI SELL matrices */ 3171 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3172 { 3173 Mat B; 3174 Mat_MUMPS *mumps; 3175 PetscBool isSeqSELL; 3176 PetscMPIInt size; 3177 3178 PetscFunctionBegin; 3179 /* Create the factorization matrix */ 3180 CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL)); 3181 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B)); 3182 CHKERRQ(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 3183 CHKERRQ(PetscStrallocpy("mumps",&((PetscObject)B)->type_name)); 3184 CHKERRQ(MatSetUp(B)); 3185 3186 CHKERRQ(PetscNewLog(B,&mumps)); 3187 3188 B->ops->view = MatView_MUMPS; 3189 B->ops->getinfo = MatGetInfo_MUMPS; 3190 3191 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps)); 3192 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS)); 3193 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS)); 3194 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS)); 3195 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS)); 3196 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS)); 3197 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS)); 3198 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS)); 3199 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS)); 3200 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS)); 3201 CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS)); 3202 3203 if (ftype == MAT_FACTOR_LU) { 3204 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3205 B->factortype = MAT_FACTOR_LU; 3206 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3207 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3208 mumps->sym = 0; 3209 CHKERRQ(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU])); 3210 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3211 3212 /* set solvertype */ 3213 CHKERRQ(PetscFree(B->solvertype)); 3214 CHKERRQ(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype)); 3215 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 3216 if (size == 1) { 3217 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 3218 B->canuseordering = PETSC_TRUE; 3219 } 3220 B->ops->destroy = MatDestroy_MUMPS; 3221 B->data = (void*)mumps; 3222 3223 CHKERRQ(PetscInitializeMUMPS(A,mumps)); 3224 3225 *F = B; 3226 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 3227 PetscFunctionReturn(0); 3228 } 3229 3230 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3231 { 3232 PetscFunctionBegin; 3233 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3234 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3235 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3236 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3237 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3238 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps)); 3239 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps)); 3240 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps)); 3241 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps)); 3242 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps)); 3243 CHKERRQ(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps)); 3244 PetscFunctionReturn(0); 3245 } 3246