1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while(0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 #endif 170 Vec b_seq,x_seq; 171 PetscInt ninfo,*info; /* which INFO to display */ 172 PetscInt sizeredrhs; 173 PetscScalar *schur_sol; 174 PetscInt schur_sizesol; 175 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 176 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 177 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 178 179 /* stuff used by petsc/mumps OpenMP support*/ 180 PetscBool use_petsc_omp_support; 181 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 182 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 183 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 184 PetscMPIInt tag,omp_comm_size; 185 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 186 MPI_Request *reqs; 187 }; 188 189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 190 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 191 */ 192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 193 { 194 PetscErrorCode ierr; 195 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 196 197 PetscFunctionBegin; 198 #if defined(PETSC_USE_64BIT_INDICES) 199 { 200 PetscInt i; 201 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 202 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 203 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 204 mumps->cur_ilen = nrow+1; 205 } 206 if (nnz > mumps->cur_jlen) { 207 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 208 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 209 mumps->cur_jlen = nnz; 210 } 211 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 212 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 213 *ia_mumps = mumps->ia_alloc; 214 *ja_mumps = mumps->ja_alloc; 215 } 216 #else 217 *ia_mumps = ia; 218 *ja_mumps = ja; 219 #endif 220 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 225 { 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 231 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 232 mumps->id.size_schur = 0; 233 mumps->id.schur_lld = 0; 234 mumps->id.ICNTL(19) = 0; 235 PetscFunctionReturn(0); 236 } 237 238 /* solve with rhs in mumps->id.redrhs and return in the same location */ 239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 240 { 241 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 242 Mat S,B,X; 243 MatFactorSchurStatus schurstatus; 244 PetscInt sizesol; 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 249 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 250 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 251 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 252 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 253 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 254 #endif 255 switch (schurstatus) { 256 case MAT_FACTOR_SCHUR_FACTORED: 257 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 258 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 260 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 261 #endif 262 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 263 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 264 } else { 265 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 266 } 267 break; 268 case MAT_FACTOR_SCHUR_INVERTED: 269 sizesol = mumps->id.nrhs*mumps->id.size_schur; 270 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 271 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 272 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 273 mumps->schur_sizesol = sizesol; 274 } 275 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 276 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 277 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 278 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 279 #endif 280 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 281 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 282 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 283 } else { 284 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 285 } 286 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 287 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 288 ierr = MatProductNumeric(X);CHKERRQ(ierr); 289 290 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 291 break; 292 default: 293 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 294 break; 295 } 296 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 297 ierr = MatDestroy(&B);CHKERRQ(ierr); 298 ierr = MatDestroy(&X);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 309 PetscFunctionReturn(0); 310 } 311 if (!expansion) { /* prepare for the condensation step */ 312 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 313 /* allocate MUMPS internal array to store reduced right-hand sides */ 314 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 315 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->id.lredrhs = mumps->id.size_schur; 317 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 318 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 319 } 320 mumps->id.ICNTL(26) = 1; /* condensation phase */ 321 } else { /* prepare for the expansion step */ 322 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 323 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 324 mumps->id.ICNTL(26) = 2; /* expansion phase */ 325 PetscMUMPS_c(mumps); 326 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 327 /* restore defaults */ 328 mumps->id.ICNTL(26) = -1; 329 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 330 if (mumps->id.nrhs > 1) { 331 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 332 mumps->id.lredrhs = 0; 333 mumps->sizeredrhs = 0; 334 } 335 } 336 PetscFunctionReturn(0); 337 } 338 339 /* 340 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 341 342 input: 343 A - matrix in aij,baij or sbaij format 344 shift - 0: C style output triple; 1: Fortran style output triple. 345 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 346 MAT_REUSE_MATRIX: only the values in v array are updated 347 output: 348 nnz - dim of r, c, and v (number of local nonzero entries of A) 349 r, c, v - row and col index, matrix values (matrix triples) 350 351 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 352 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 353 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 354 355 */ 356 357 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 358 { 359 const PetscScalar *av; 360 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 361 PetscInt64 nz,rnz,i,j,k; 362 PetscErrorCode ierr; 363 PetscMUMPSInt *row,*col; 364 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 365 366 PetscFunctionBegin; 367 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 368 mumps->val = (PetscScalar*)av; 369 if (reuse == MAT_INITIAL_MATRIX) { 370 nz = aa->nz; 371 ai = aa->i; 372 aj = aa->j; 373 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 374 for (i=k=0; i<M; i++) { 375 rnz = ai[i+1] - ai[i]; 376 ajj = aj + ai[i]; 377 for (j=0; j<rnz; j++) { 378 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 379 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 380 k++; 381 } 382 } 383 mumps->irn = row; 384 mumps->jcn = col; 385 mumps->nnz = nz; 386 } 387 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 392 { 393 PetscErrorCode ierr; 394 PetscInt64 nz,i,j,k,r; 395 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 396 PetscMUMPSInt *row,*col; 397 398 PetscFunctionBegin; 399 mumps->val = a->val; 400 if (reuse == MAT_INITIAL_MATRIX) { 401 nz = a->sliidx[a->totalslices]; 402 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 403 for (i=k=0; i<a->totalslices; i++) { 404 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 405 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 406 } 407 } 408 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 409 mumps->irn = row; 410 mumps->jcn = col; 411 mumps->nnz = nz; 412 } 413 PetscFunctionReturn(0); 414 } 415 416 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 417 { 418 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 419 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 420 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 421 PetscInt bs; 422 PetscErrorCode ierr; 423 PetscMUMPSInt *row,*col; 424 425 PetscFunctionBegin; 426 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 427 M = A->rmap->N/bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; aj = aa->j; 431 nz = bs2*aa->nz; 432 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 433 for (i=0; i<M; i++) { 434 ajj = aj + ai[i]; 435 rnz = ai[i+1] - ai[i]; 436 for (k=0; k<rnz; k++) { 437 for (j=0; j<bs; j++) { 438 for (m=0; m<bs; m++) { 439 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 440 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 441 idx++; 442 } 443 } 444 } 445 } 446 mumps->irn = row; 447 mumps->jcn = col; 448 mumps->nnz = nz; 449 } 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 454 { 455 const PetscInt *ai, *aj,*ajj; 456 PetscInt bs; 457 PetscInt64 nz,rnz,i,j,k,m; 458 PetscErrorCode ierr; 459 PetscMUMPSInt *row,*col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 462 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 470 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 nz = aa->nz; 477 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 478 if (bs>1) { 479 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 480 mumps->val = mumps->val_alloc; 481 } else { 482 mumps->val = aa->a; 483 } 484 mumps->irn = row; 485 mumps->jcn = col; 486 } else { 487 if (bs == 1) mumps->val = aa->a; 488 row = mumps->irn; 489 col = mumps->jcn; 490 } 491 val = mumps->val; 492 493 nz = 0; 494 if (bs>1) { 495 for (i=0; i<mbs; i++) { 496 rnz = ai[i+1] - ai[i]; 497 ajj = aj + ai[i]; 498 for (j=0; j<rnz; j++) { 499 for (k=0; k<bs; k++) { 500 for (m=0; m<bs; m++) { 501 if (ajj[j]>i || k>=m) { 502 if (reuse == MAT_INITIAL_MATRIX) { 503 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 504 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 505 } 506 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 507 } 508 } 509 } 510 } 511 } 512 } else if (reuse == MAT_INITIAL_MATRIX) { 513 for (i=0; i<mbs; i++) { 514 rnz = ai[i+1] - ai[i]; 515 ajj = aj + ai[i]; 516 for (j=0; j<rnz; j++) { 517 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 518 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 519 nz++; 520 } 521 } 522 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 523 } 524 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 525 PetscFunctionReturn(0); 526 } 527 528 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 529 { 530 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 531 PetscInt64 nz,rnz,i,j; 532 const PetscScalar *av,*v1; 533 PetscScalar *val; 534 PetscErrorCode ierr; 535 PetscMUMPSInt *row,*col; 536 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 548 ai = aa->i; aj = aa->j; 549 adiag = aa->diag; 550 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 551 if (reuse == MAT_INITIAL_MATRIX) { 552 /* count nz in the upper triangular part of A */ 553 nz = 0; 554 if (missing) { 555 for (i=0; i<M; i++) { 556 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 557 for (j=ai[i];j<ai[i+1];j++) { 558 if (aj[j] < i) continue; 559 nz++; 560 } 561 } else { 562 nz += ai[i+1] - adiag[i]; 563 } 564 } 565 } else { 566 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 567 } 568 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 569 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 570 mumps->nnz = nz; 571 mumps->irn = row; 572 mumps->jcn = col; 573 mumps->val = mumps->val_alloc = val; 574 575 nz = 0; 576 if (missing) { 577 for (i=0; i<M; i++) { 578 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 579 for (j=ai[i];j<ai[i+1];j++) { 580 if (aj[j] < i) continue; 581 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 582 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 583 val[nz] = av[j]; 584 nz++; 585 } 586 } else { 587 rnz = ai[i+1] - adiag[i]; 588 ajj = aj + adiag[i]; 589 v1 = av + adiag[i]; 590 for (j=0; j<rnz; j++) { 591 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 592 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 593 val[nz++] = v1[j]; 594 } 595 } 596 } 597 } else { 598 for (i=0; i<M; i++) { 599 rnz = ai[i+1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j=0; j<rnz; j++) { 603 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 604 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 nz = 0; 611 val = mumps->val; 612 if (missing) { 613 for (i=0; i <M; i++) { 614 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 615 for (j=ai[i];j<ai[i+1];j++) { 616 if (aj[j] < i) continue; 617 val[nz++] = av[j]; 618 } 619 } else { 620 rnz = ai[i+1] - adiag[i]; 621 v1 = av + adiag[i]; 622 for (j=0; j<rnz; j++) { 623 val[nz++] = v1[j]; 624 } 625 } 626 } 627 } else { 628 for (i=0; i <M; i++) { 629 rnz = ai[i+1] - adiag[i]; 630 v1 = av + adiag[i]; 631 for (j=0; j<rnz; j++) { 632 val[nz++] = v1[j]; 633 } 634 } 635 } 636 } 637 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 642 { 643 PetscErrorCode ierr; 644 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 645 PetscInt bs; 646 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 647 PetscMUMPSInt *row,*col; 648 const PetscScalar *av,*bv,*v1,*v2; 649 PetscScalar *val; 650 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 651 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 652 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 653 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 654 #if defined(PETSC_USE_COMPLEX) 655 PetscBool hermitian; 656 #endif 657 658 PetscFunctionBegin; 659 #if defined(PETSC_USE_COMPLEX) 660 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 661 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 662 #endif 663 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 664 rstart = A->rmap->rstart; 665 ai = aa->i; 666 aj = aa->j; 667 bi = bb->i; 668 bj = bb->j; 669 av = aa->a; 670 bv = bb->a; 671 672 garray = mat->garray; 673 674 if (reuse == MAT_INITIAL_MATRIX) { 675 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 676 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 677 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 678 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 679 mumps->irn = row; 680 mumps->jcn = col; 681 mumps->val = mumps->val_alloc = val; 682 } else { 683 val = mumps->val; 684 } 685 686 jj = 0; irow = rstart; 687 for (i=0; i<mbs; i++) { 688 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 689 countA = ai[i+1] - ai[i]; 690 countB = bi[i+1] - bi[i]; 691 bjj = bj + bi[i]; 692 v1 = av + ai[i]*bs2; 693 v2 = bv + bi[i]*bs2; 694 695 if (bs>1) { 696 /* A-part */ 697 for (j=0; j<countA; j++) { 698 for (k=0; k<bs; k++) { 699 for (m=0; m<bs; m++) { 700 if (rstart + ajj[j]*bs>irow || k>=m) { 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 703 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 704 } 705 val[jj++] = v1[j*bs2 + m + k*bs]; 706 } 707 } 708 } 709 } 710 711 /* B-part */ 712 for (j=0; j < countB; j++) { 713 for (k=0; k<bs; k++) { 714 for (m=0; m<bs; m++) { 715 if (reuse == MAT_INITIAL_MATRIX) { 716 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 717 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 718 } 719 val[jj++] = v2[j*bs2 + m + k*bs]; 720 } 721 } 722 } 723 } else { 724 /* A-part */ 725 for (j=0; j<countA; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 728 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 729 } 730 val[jj++] = v1[j]; 731 } 732 733 /* B-part */ 734 for (j=0; j < countB; j++) { 735 if (reuse == MAT_INITIAL_MATRIX) { 736 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 737 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 738 } 739 val[jj++] = v2[j]; 740 } 741 } 742 irow+=bs; 743 } 744 mumps->nnz = jj; 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 749 { 750 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 751 PetscErrorCode ierr; 752 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 753 PetscMUMPSInt *row,*col; 754 const PetscScalar *av, *bv,*v1,*v2; 755 PetscScalar *val; 756 Mat Ad,Ao; 757 Mat_SeqAIJ *aa; 758 Mat_SeqAIJ *bb; 759 760 PetscFunctionBegin; 761 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 762 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 763 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 764 765 aa = (Mat_SeqAIJ*)(Ad)->data; 766 bb = (Mat_SeqAIJ*)(Ao)->data; 767 ai = aa->i; 768 aj = aa->j; 769 bi = bb->i; 770 bj = bb->j; 771 772 rstart = A->rmap->rstart; 773 774 if (reuse == MAT_INITIAL_MATRIX) { 775 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 776 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 777 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 778 mumps->nnz = nz; 779 mumps->irn = row; 780 mumps->jcn = col; 781 mumps->val = mumps->val_alloc = val; 782 } else { 783 val = mumps->val; 784 } 785 786 jj = 0; irow = rstart; 787 for (i=0; i<m; i++) { 788 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 789 countA = ai[i+1] - ai[i]; 790 countB = bi[i+1] - bi[i]; 791 bjj = bj + bi[i]; 792 v1 = av + ai[i]; 793 v2 = bv + bi[i]; 794 795 /* A-part */ 796 for (j=0; j<countA; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 799 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 800 } 801 val[jj++] = v1[j]; 802 } 803 804 /* B-part */ 805 for (j=0; j < countB; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 808 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 809 } 810 val[jj++] = v2[j]; 811 } 812 irow++; 813 } 814 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 815 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 820 { 821 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 822 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 823 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 824 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 825 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 826 const PetscInt bs2=mat->bs2; 827 PetscErrorCode ierr; 828 PetscInt bs; 829 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 830 PetscMUMPSInt *row,*col; 831 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 832 PetscScalar *val; 833 834 PetscFunctionBegin; 835 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 836 if (reuse == MAT_INITIAL_MATRIX) { 837 nz = bs2*(aa->nz + bb->nz); 838 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 839 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 840 mumps->nnz = nz; 841 mumps->irn = row; 842 mumps->jcn = col; 843 mumps->val = mumps->val_alloc = val; 844 } else { 845 val = mumps->val; 846 } 847 848 jj = 0; irow = rstart; 849 for (i=0; i<mbs; i++) { 850 countA = ai[i+1] - ai[i]; 851 countB = bi[i+1] - bi[i]; 852 ajj = aj + ai[i]; 853 bjj = bj + bi[i]; 854 v1 = av + bs2*ai[i]; 855 v2 = bv + bs2*bi[i]; 856 857 idx = 0; 858 /* A-part */ 859 for (k=0; k<countA; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 864 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 865 } 866 val[jj++] = v1[idx++]; 867 } 868 } 869 } 870 871 idx = 0; 872 /* B-part */ 873 for (k=0; k<countB; k++) { 874 for (j=0; j<bs; j++) { 875 for (n=0; n<bs; n++) { 876 if (reuse == MAT_INITIAL_MATRIX) { 877 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 878 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 879 } 880 val[jj++] = v2[idx++]; 881 } 882 } 883 } 884 irow += bs; 885 } 886 PetscFunctionReturn(0); 887 } 888 889 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 890 { 891 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 892 PetscErrorCode ierr; 893 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 894 PetscMUMPSInt *row,*col; 895 const PetscScalar *av, *bv,*v1,*v2; 896 PetscScalar *val; 897 Mat Ad,Ao; 898 Mat_SeqAIJ *aa; 899 Mat_SeqAIJ *bb; 900 #if defined(PETSC_USE_COMPLEX) 901 PetscBool hermitian; 902 #endif 903 904 PetscFunctionBegin; 905 #if defined(PETSC_USE_COMPLEX) 906 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 907 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 908 #endif 909 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 911 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 912 913 aa = (Mat_SeqAIJ*)(Ad)->data; 914 bb = (Mat_SeqAIJ*)(Ao)->data; 915 ai = aa->i; 916 aj = aa->j; 917 adiag = aa->diag; 918 bi = bb->i; 919 bj = bb->j; 920 921 rstart = A->rmap->rstart; 922 923 if (reuse == MAT_INITIAL_MATRIX) { 924 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 925 nzb = 0; /* num of upper triangular entries in mat->B */ 926 for (i=0; i<m; i++) { 927 nza += (ai[i+1] - adiag[i]); 928 countB = bi[i+1] - bi[i]; 929 bjj = bj + bi[i]; 930 for (j=0; j<countB; j++) { 931 if (garray[bjj[j]] > rstart) nzb++; 932 } 933 } 934 935 nz = nza + nzb; /* total nz of upper triangular part of mat */ 936 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 937 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 938 mumps->nnz = nz; 939 mumps->irn = row; 940 mumps->jcn = col; 941 mumps->val = mumps->val_alloc = val; 942 } else { 943 val = mumps->val; 944 } 945 946 jj = 0; irow = rstart; 947 for (i=0; i<m; i++) { 948 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 949 v1 = av + adiag[i]; 950 countA = ai[i+1] - adiag[i]; 951 countB = bi[i+1] - bi[i]; 952 bjj = bj + bi[i]; 953 v2 = bv + bi[i]; 954 955 /* A-part */ 956 for (j=0; j<countA; j++) { 957 if (reuse == MAT_INITIAL_MATRIX) { 958 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 959 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 960 } 961 val[jj++] = v1[j]; 962 } 963 964 /* B-part */ 965 for (j=0; j < countB; j++) { 966 if (garray[bjj[j]] > rstart) { 967 if (reuse == MAT_INITIAL_MATRIX) { 968 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 969 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 970 } 971 val[jj++] = v2[j]; 972 } 973 } 974 irow++; 975 } 976 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 977 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatDestroy_MUMPS(Mat A) 982 { 983 PetscErrorCode ierr; 984 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 985 986 PetscFunctionBegin; 987 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 988 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 989 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 990 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 991 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 993 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 994 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 995 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 996 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 997 mumps->id.job = JOB_END; 998 PetscMUMPS_c(mumps); 999 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1000 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1001 if (mumps->use_petsc_omp_support) { 1002 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1003 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1004 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1005 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1006 #endif 1007 } 1008 #endif 1009 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1010 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1011 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1012 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1013 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1014 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1015 #endif 1016 ierr = PetscFree(A->data);CHKERRQ(ierr); 1017 1018 /* clear composed functions */ 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1029 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1030 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1031 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1036 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1037 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1038 { 1039 PetscErrorCode ierr; 1040 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1041 const PetscMPIInt ompsize=mumps->omp_comm_size; 1042 PetscInt i,m,M,rstart; 1043 1044 PetscFunctionBegin; 1045 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1046 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1047 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1048 if (ompsize == 1) { 1049 if (!mumps->irhs_loc) { 1050 mumps->nloc_rhs = m; 1051 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1052 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1053 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1054 } 1055 mumps->id.rhs_loc = (MumpsScalar*)array; 1056 } else { 1057 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1058 const PetscInt *ranges; 1059 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1060 MPI_Group petsc_group,omp_group; 1061 PetscScalar *recvbuf=NULL; 1062 1063 if (mumps->is_omp_master) { 1064 /* Lazily initialize the omp stuff for distributed rhs */ 1065 if (!mumps->irhs_loc) { 1066 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1067 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1068 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr); 1069 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1071 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr); 1072 1073 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1074 mumps->nloc_rhs = 0; 1075 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1076 for (j=0; j<ompsize; j++) { 1077 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1078 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1079 } 1080 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1081 for (j=k=0; j<ompsize; j++) { 1082 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1083 } 1084 1085 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1086 ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr); 1087 ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr); 1088 } 1089 1090 /* Realloc buffers when current nrhs is bigger than what we have met */ 1091 if (nrhs > mumps->max_nrhs) { 1092 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1093 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1094 mumps->max_nrhs = nrhs; 1095 } 1096 1097 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1098 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1099 mumps->rhs_disps[0] = 0; 1100 for (j=1; j<ompsize; j++) { 1101 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1102 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1103 } 1104 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1105 } 1106 1107 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1108 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr); 1109 1110 if (mumps->is_omp_master) { 1111 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1112 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1113 for (j=0; j<ompsize; j++) { 1114 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1115 dst = dstbase; 1116 for (i=0; i<nrhs; i++) { 1117 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1118 src += mumps->rhs_nrow[j]; 1119 dst += mumps->nloc_rhs; 1120 } 1121 dstbase += mumps->rhs_nrow[j]; 1122 } 1123 } 1124 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1125 } 1126 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1127 } 1128 mumps->id.nrhs = nrhs; 1129 mumps->id.nloc_rhs = mumps->nloc_rhs; 1130 mumps->id.lrhs_loc = mumps->nloc_rhs; 1131 mumps->id.irhs_loc = mumps->irhs_loc; 1132 PetscFunctionReturn(0); 1133 } 1134 #endif /* PETSC_PKG_MUMPS_VERSION_GE(5,3,0) */ 1135 1136 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1137 { 1138 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1139 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1140 const PetscScalar *rarray; 1141 #endif 1142 PetscScalar *array; 1143 IS is_iden,is_petsc; 1144 PetscErrorCode ierr; 1145 PetscInt i; 1146 PetscBool second_solve = PETSC_FALSE; 1147 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1148 1149 PetscFunctionBegin; 1150 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1151 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1152 1153 if (A->factorerrortype) { 1154 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1155 ierr = VecSetInf(x);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 mumps->id.nrhs = 1; 1160 if (mumps->petsc_size > 1) { 1161 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1162 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1163 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1164 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1165 #else 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/ 1167 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 if (!mumps->myid) { 1170 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1171 mumps->id.rhs = (MumpsScalar*)array; 1172 } 1173 #endif 1174 } else { /* petsc_size == 1 */ 1175 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1176 ierr = VecCopy(b,x);CHKERRQ(ierr); 1177 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1178 mumps->id.rhs = (MumpsScalar*)array; 1179 } 1180 1181 /* 1182 handle condensation step of Schur complement (if any) 1183 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1184 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1185 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1186 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1187 */ 1188 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1189 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1190 second_solve = PETSC_TRUE; 1191 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1192 } 1193 /* solve phase */ 1194 /*-------------*/ 1195 mumps->id.job = JOB_SOLVE; 1196 PetscMUMPS_c(mumps); 1197 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1198 1199 /* handle expansion step of Schur complement (if any) */ 1200 if (second_solve) { 1201 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1202 } 1203 1204 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1205 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1206 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1207 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1208 } 1209 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1210 PetscInt *isol2_loc=NULL; 1211 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1212 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1213 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1215 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1216 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1218 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1219 } 1220 1221 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 } 1224 1225 if (mumps->petsc_size > 1) { 1226 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1227 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1228 #else 1229 if (!mumps->myid) {ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);} 1230 #endif 1231 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1232 1233 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1238 { 1239 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 mumps->id.ICNTL(9) = 0; 1244 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1245 mumps->id.ICNTL(9) = 1; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1250 { 1251 PetscErrorCode ierr; 1252 Mat Bt = NULL; 1253 PetscBool denseX,denseB,flg,flgT; 1254 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1255 PetscInt i,nrhs,M; 1256 PetscScalar *array; 1257 const PetscScalar *rbray; 1258 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1259 PetscMUMPSInt *isol_loc,*isol_loc_save; 1260 PetscScalar *bray,*sol_loc,*sol_loc_save; 1261 IS is_to,is_from; 1262 PetscInt k,proc,j,m,myrstart; 1263 const PetscInt *rstart; 1264 Vec v_mpi,msol_loc; 1265 VecScatter scat_sol; 1266 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1267 Vec b_seq; 1268 VecScatter scat_rhs; 1269 #endif 1270 PetscScalar *aa; 1271 PetscInt spnr,*ia,*ja; 1272 Mat_MPIAIJ *b = NULL; 1273 1274 PetscFunctionBegin; 1275 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1276 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1277 1278 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1279 if (denseB) { 1280 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1281 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1282 } else { /* sparse B */ 1283 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1284 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1285 if (flgT) { /* input B is transpose of actural RHS matrix, 1286 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1287 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1288 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1289 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1290 } 1291 1292 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1293 mumps->id.nrhs = nrhs; 1294 mumps->id.lrhs = M; 1295 mumps->id.rhs = NULL; 1296 1297 if (mumps->petsc_size == 1) { 1298 PetscScalar *aa; 1299 PetscInt spnr,*ia,*ja; 1300 PetscBool second_solve = PETSC_FALSE; 1301 1302 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1303 mumps->id.rhs = (MumpsScalar*)array; 1304 1305 if (denseB) { 1306 /* copy B to X */ 1307 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1308 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1309 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1310 } else { /* sparse B */ 1311 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1312 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1313 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1314 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1315 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1316 } 1317 /* handle condensation step of Schur complement (if any) */ 1318 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1319 second_solve = PETSC_TRUE; 1320 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1321 } 1322 /* solve phase */ 1323 /*-------------*/ 1324 mumps->id.job = JOB_SOLVE; 1325 PetscMUMPS_c(mumps); 1326 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1327 1328 /* handle expansion step of Schur complement (if any) */ 1329 if (second_solve) { 1330 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1331 } 1332 if (!denseB) { /* sparse B */ 1333 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1334 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1335 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1336 } 1337 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1342 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1343 1344 /* create msol_loc to hold mumps local solution */ 1345 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1346 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1347 1348 lsol_loc = mumps->id.lsol_loc; 1349 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1350 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1351 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1352 mumps->id.isol_loc = isol_loc; 1353 1354 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1355 1356 if (denseB) { 1357 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1358 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1359 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1360 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1361 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1364 #else 1365 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1366 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1367 0, re-arrange B into desired order, which is a local operation. 1368 */ 1369 1370 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1371 /* wrap dense rhs matrix B into a vector v_mpi */ 1372 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1373 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1374 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1375 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1376 1377 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1378 if (!mumps->myid) { 1379 PetscInt *idx; 1380 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1381 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1382 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1383 k = 0; 1384 for (proc=0; proc<mumps->petsc_size; proc++){ 1385 for (j=0; j<nrhs; j++){ 1386 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1387 } 1388 } 1389 1390 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1391 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1392 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1393 } else { 1394 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1395 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1396 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1397 } 1398 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1399 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1400 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1401 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1402 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1403 1404 if (!mumps->myid) { /* define rhs on the host */ 1405 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1406 mumps->id.rhs = (MumpsScalar*)bray; 1407 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1408 } 1409 #endif 1410 } else { /* sparse B */ 1411 b = (Mat_MPIAIJ*)Bt->data; 1412 1413 /* wrap dense X into a vector v_mpi */ 1414 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1415 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1416 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1417 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1418 1419 if (!mumps->myid) { 1420 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1421 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1423 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1424 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1425 } else { 1426 mumps->id.irhs_ptr = NULL; 1427 mumps->id.irhs_sparse = NULL; 1428 mumps->id.nz_rhs = 0; 1429 mumps->id.rhs_sparse = NULL; 1430 } 1431 } 1432 1433 /* solve phase */ 1434 /*-------------*/ 1435 mumps->id.job = JOB_SOLVE; 1436 PetscMUMPS_c(mumps); 1437 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1438 1439 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1440 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1441 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1442 1443 /* create scatter scat_sol */ 1444 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1445 /* iidx: index for scatter mumps solution to petsc X */ 1446 1447 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1448 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1449 for (i=0; i<lsol_loc; i++) { 1450 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1451 1452 for (proc=0; proc<mumps->petsc_size; proc++){ 1453 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1454 myrstart = rstart[proc]; 1455 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1456 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1457 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1458 break; 1459 } 1460 } 1461 1462 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1463 } 1464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1465 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1466 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1468 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1469 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1471 1472 /* free spaces */ 1473 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1474 mumps->id.isol_loc = isol_loc_save; 1475 1476 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1477 ierr = PetscFree(idxx);CHKERRQ(ierr); 1478 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1479 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1480 if (!denseB) { 1481 if (!mumps->myid) { 1482 b = (Mat_MPIAIJ*)Bt->data; 1483 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1484 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1485 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1486 } 1487 } else { 1488 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1489 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1490 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1491 #endif 1492 } 1493 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1494 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1499 { 1500 PetscErrorCode ierr; 1501 PetscBool flg; 1502 Mat B; 1503 1504 PetscFunctionBegin; 1505 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1506 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1507 1508 /* Create B=Bt^T that uses Bt's data structure */ 1509 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1510 1511 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1512 ierr = MatDestroy(&B);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #if !defined(PETSC_USE_COMPLEX) 1517 /* 1518 input: 1519 F: numeric factor 1520 output: 1521 nneg: total number of negative pivots 1522 nzero: total number of zero pivots 1523 npos: (global dimension of F) - nneg - nzero 1524 */ 1525 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1526 { 1527 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1528 PetscErrorCode ierr; 1529 PetscMPIInt size; 1530 1531 PetscFunctionBegin; 1532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1533 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1534 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1535 1536 if (nneg) *nneg = mumps->id.INFOG(12); 1537 if (nzero || npos) { 1538 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1539 if (nzero) *nzero = mumps->id.INFOG(28); 1540 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1541 } 1542 PetscFunctionReturn(0); 1543 } 1544 #endif 1545 1546 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1547 { 1548 PetscErrorCode ierr; 1549 PetscInt i,nreqs; 1550 PetscMUMPSInt *irn,*jcn; 1551 PetscMPIInt count; 1552 PetscInt64 totnnz,remain; 1553 const PetscInt osize=mumps->omp_comm_size; 1554 PetscScalar *val; 1555 1556 PetscFunctionBegin; 1557 if (osize > 1) { 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 /* master first gathers counts of nonzeros to receive */ 1560 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1561 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1562 1563 /* Then each computes number of send/recvs */ 1564 if (mumps->is_omp_master) { 1565 /* Start from 1 since self communication is not done in MPI */ 1566 nreqs = 0; 1567 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1568 } else { 1569 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1570 } 1571 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1572 1573 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1574 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1575 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1576 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1577 */ 1578 nreqs = 0; /* counter for actual send/recvs */ 1579 if (mumps->is_omp_master) { 1580 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1581 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1582 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1583 1584 /* Self communication */ 1585 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1586 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1587 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1588 1589 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1590 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1591 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1592 mumps->nnz = totnnz; 1593 mumps->irn = irn; 1594 mumps->jcn = jcn; 1595 mumps->val = mumps->val_alloc = val; 1596 1597 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1598 jcn += mumps->recvcount[0]; 1599 val += mumps->recvcount[0]; 1600 1601 /* Remote communication */ 1602 for (i=1; i<osize; i++) { 1603 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1604 remain = mumps->recvcount[i] - count; 1605 while (count>0) { 1606 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1607 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1608 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1609 irn += count; 1610 jcn += count; 1611 val += count; 1612 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1613 remain -= count; 1614 } 1615 } 1616 } else { 1617 irn = mumps->irn; 1618 jcn = mumps->jcn; 1619 val = mumps->val; 1620 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1621 remain = mumps->nnz - count; 1622 while (count>0) { 1623 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1624 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1625 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1626 irn += count; 1627 jcn += count; 1628 val += count; 1629 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1630 remain -= count; 1631 } 1632 } 1633 } else { 1634 nreqs = 0; 1635 if (mumps->is_omp_master) { 1636 val = mumps->val + mumps->recvcount[0]; 1637 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1638 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1639 remain = mumps->recvcount[i] - count; 1640 while (count>0) { 1641 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1642 val += count; 1643 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1644 remain -= count; 1645 } 1646 } 1647 } else { 1648 val = mumps->val; 1649 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1650 remain = mumps->nnz - count; 1651 while (count>0) { 1652 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1653 val += count; 1654 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1655 remain -= count; 1656 } 1657 } 1658 } 1659 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1660 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1666 { 1667 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1668 PetscErrorCode ierr; 1669 PetscBool isMPIAIJ; 1670 1671 PetscFunctionBegin; 1672 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1673 if (mumps->id.INFOG(1) == -6) { 1674 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1675 } 1676 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1681 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1682 1683 /* numerical factorization phase */ 1684 /*-------------------------------*/ 1685 mumps->id.job = JOB_FACTNUMERIC; 1686 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1687 if (!mumps->myid) { 1688 mumps->id.a = (MumpsScalar*)mumps->val; 1689 } 1690 } else { 1691 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1692 } 1693 PetscMUMPS_c(mumps); 1694 if (mumps->id.INFOG(1) < 0) { 1695 if (A->erroriffailure) { 1696 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1697 } else { 1698 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1699 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1700 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1701 } else if (mumps->id.INFOG(1) == -13) { 1702 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1703 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1704 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1705 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1706 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1707 } else { 1708 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1709 F->factorerrortype = MAT_FACTOR_OTHER; 1710 } 1711 } 1712 } 1713 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1714 1715 F->assembled = PETSC_TRUE; 1716 mumps->matstruc = SAME_NONZERO_PATTERN; 1717 if (F->schur) { /* reset Schur status to unfactored */ 1718 #if defined(PETSC_HAVE_CUDA) 1719 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1720 #endif 1721 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1722 mumps->id.ICNTL(19) = 2; 1723 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1724 } 1725 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1726 } 1727 1728 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1729 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1730 1731 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1732 if (mumps->petsc_size > 1) { 1733 PetscInt lsol_loc; 1734 PetscScalar *sol_loc; 1735 1736 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1737 1738 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1739 if (mumps->x_seq) { 1740 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1741 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1742 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1743 } 1744 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1745 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1746 mumps->id.lsol_loc = lsol_loc; 1747 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1748 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1749 } 1750 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 /* Sets MUMPS options from the options database */ 1755 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1756 { 1757 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1758 PetscErrorCode ierr; 1759 PetscMUMPSInt icntl=0; 1760 PetscInt info[80],i,ninfo=80; 1761 PetscBool flg=PETSC_FALSE; 1762 1763 PetscFunctionBegin; 1764 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1765 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1766 if (flg) mumps->id.ICNTL(1) = icntl; 1767 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1768 if (flg) mumps->id.ICNTL(2) = icntl; 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) mumps->id.ICNTL(3) = icntl; 1771 1772 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1773 if (flg) mumps->id.ICNTL(4) = icntl; 1774 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1775 1776 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1777 if (flg) mumps->id.ICNTL(6) = icntl; 1778 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1780 if (flg) { 1781 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n"); 1782 else mumps->id.ICNTL(7) = icntl; 1783 } 1784 1785 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1786 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1787 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1788 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1789 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1790 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1791 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1792 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1793 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1794 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1795 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1796 } 1797 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1798 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1799 1800 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1801 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1802 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1803 if (mumps->id.ICNTL(24)) { 1804 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1805 } 1806 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1808 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1810 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1811 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1812 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1814 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1819 1820 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1821 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1822 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1824 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1826 1827 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1828 1829 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1830 if (ninfo) { 1831 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1832 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1833 mumps->ninfo = ninfo; 1834 for (i=0; i<ninfo; i++) { 1835 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1836 else mumps->info[i] = info[i]; 1837 } 1838 } 1839 1840 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1845 { 1846 PetscErrorCode ierr; 1847 PetscInt nthreads=0; 1848 1849 PetscFunctionBegin; 1850 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1851 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1852 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1853 1854 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1855 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1856 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1857 if (mumps->use_petsc_omp_support) { 1858 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1859 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1860 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1861 #else 1862 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1863 #endif 1864 } else { 1865 mumps->omp_comm = PETSC_COMM_SELF; 1866 mumps->mumps_comm = mumps->petsc_comm; 1867 mumps->is_omp_master = PETSC_TRUE; 1868 } 1869 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1870 mumps->reqs = NULL; 1871 mumps->tag = 0; 1872 1873 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1874 mumps->id.job = JOB_INIT; 1875 mumps->id.par = 1; /* host participates factorizaton and solve */ 1876 mumps->id.sym = mumps->sym; 1877 1878 PetscMUMPS_c(mumps); 1879 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1880 1881 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1882 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1883 */ 1884 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1885 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1886 1887 mumps->scat_rhs = NULL; 1888 mumps->scat_sol = NULL; 1889 1890 /* set PETSc-MUMPS default options - override MUMPS default */ 1891 mumps->id.ICNTL(3) = 0; 1892 mumps->id.ICNTL(4) = 0; 1893 if (mumps->petsc_size == 1) { 1894 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1895 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1896 } else { 1897 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1898 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1899 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1900 } 1901 1902 /* schur */ 1903 mumps->id.size_schur = 0; 1904 mumps->id.listvar_schur = NULL; 1905 mumps->id.schur = NULL; 1906 mumps->sizeredrhs = 0; 1907 mumps->schur_sol = NULL; 1908 mumps->schur_sizesol = 0; 1909 PetscFunctionReturn(0); 1910 } 1911 1912 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1913 { 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 if (mumps->id.INFOG(1) < 0) { 1918 if (A->erroriffailure) { 1919 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1920 } else { 1921 if (mumps->id.INFOG(1) == -6) { 1922 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1923 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1924 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1925 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1926 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1927 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1928 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1929 } else { 1930 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1931 F->factorerrortype = MAT_FACTOR_OTHER; 1932 } 1933 } 1934 } 1935 PetscFunctionReturn(0); 1936 } 1937 1938 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1939 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1940 { 1941 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1942 PetscErrorCode ierr; 1943 Vec b; 1944 const PetscInt M = A->rmap->N; 1945 1946 PetscFunctionBegin; 1947 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1948 1949 /* Set MUMPS options from the options database */ 1950 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1951 1952 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1953 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1954 1955 /* analysis phase */ 1956 /*----------------*/ 1957 mumps->id.job = JOB_FACTSYMBOLIC; 1958 mumps->id.n = M; 1959 switch (mumps->id.ICNTL(18)) { 1960 case 0: /* centralized assembled matrix input */ 1961 if (!mumps->myid) { 1962 mumps->id.nnz = mumps->nnz; 1963 mumps->id.irn = mumps->irn; 1964 mumps->id.jcn = mumps->jcn; 1965 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1966 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1967 if (!mumps->myid) { 1968 const PetscInt *idx; 1969 PetscInt i; 1970 1971 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1972 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1973 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1974 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1975 } 1976 } 1977 } 1978 break; 1979 case 3: /* distributed assembled matrix input (size>1) */ 1980 mumps->id.nnz_loc = mumps->nnz; 1981 mumps->id.irn_loc = mumps->irn; 1982 mumps->id.jcn_loc = mumps->jcn; 1983 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1984 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1985 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1986 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1987 ierr = VecDestroy(&b);CHKERRQ(ierr); 1988 break; 1989 } 1990 PetscMUMPS_c(mumps); 1991 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1992 1993 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1994 F->ops->solve = MatSolve_MUMPS; 1995 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1996 F->ops->matsolve = MatMatSolve_MUMPS; 1997 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /* Note the Petsc r and c permutations are ignored */ 2002 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2003 { 2004 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2005 PetscErrorCode ierr; 2006 Vec b; 2007 const PetscInt M = A->rmap->N; 2008 2009 PetscFunctionBegin; 2010 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2011 2012 /* Set MUMPS options from the options database */ 2013 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2014 2015 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2016 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2017 2018 /* analysis phase */ 2019 /*----------------*/ 2020 mumps->id.job = JOB_FACTSYMBOLIC; 2021 mumps->id.n = M; 2022 switch (mumps->id.ICNTL(18)) { 2023 case 0: /* centralized assembled matrix input */ 2024 if (!mumps->myid) { 2025 mumps->id.nnz = mumps->nnz; 2026 mumps->id.irn = mumps->irn; 2027 mumps->id.jcn = mumps->jcn; 2028 if (mumps->id.ICNTL(6)>1) { 2029 mumps->id.a = (MumpsScalar*)mumps->val; 2030 } 2031 } 2032 break; 2033 case 3: /* distributed assembled matrix input (size>1) */ 2034 mumps->id.nnz_loc = mumps->nnz; 2035 mumps->id.irn_loc = mumps->irn; 2036 mumps->id.jcn_loc = mumps->jcn; 2037 if (mumps->id.ICNTL(6)>1) { 2038 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2039 } 2040 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2041 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2042 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2043 ierr = VecDestroy(&b);CHKERRQ(ierr); 2044 break; 2045 } 2046 PetscMUMPS_c(mumps); 2047 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2048 2049 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2050 F->ops->solve = MatSolve_MUMPS; 2051 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2052 PetscFunctionReturn(0); 2053 } 2054 2055 /* Note the Petsc r permutation and factor info are ignored */ 2056 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2057 { 2058 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2059 PetscErrorCode ierr; 2060 Vec b; 2061 const PetscInt M = A->rmap->N; 2062 2063 PetscFunctionBegin; 2064 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2065 2066 /* Set MUMPS options from the options database */ 2067 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2068 2069 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2070 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2071 2072 /* analysis phase */ 2073 /*----------------*/ 2074 mumps->id.job = JOB_FACTSYMBOLIC; 2075 mumps->id.n = M; 2076 switch (mumps->id.ICNTL(18)) { 2077 case 0: /* centralized assembled matrix input */ 2078 if (!mumps->myid) { 2079 mumps->id.nnz = mumps->nnz; 2080 mumps->id.irn = mumps->irn; 2081 mumps->id.jcn = mumps->jcn; 2082 if (mumps->id.ICNTL(6)>1) { 2083 mumps->id.a = (MumpsScalar*)mumps->val; 2084 } 2085 } 2086 break; 2087 case 3: /* distributed assembled matrix input (size>1) */ 2088 mumps->id.nnz_loc = mumps->nnz; 2089 mumps->id.irn_loc = mumps->irn; 2090 mumps->id.jcn_loc = mumps->jcn; 2091 if (mumps->id.ICNTL(6)>1) { 2092 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2093 } 2094 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2095 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2096 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2097 ierr = VecDestroy(&b);CHKERRQ(ierr); 2098 break; 2099 } 2100 PetscMUMPS_c(mumps); 2101 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2102 2103 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2104 F->ops->solve = MatSolve_MUMPS; 2105 F->ops->solvetranspose = MatSolve_MUMPS; 2106 F->ops->matsolve = MatMatSolve_MUMPS; 2107 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2108 #if defined(PETSC_USE_COMPLEX) 2109 F->ops->getinertia = NULL; 2110 #else 2111 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2112 #endif 2113 PetscFunctionReturn(0); 2114 } 2115 2116 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2117 { 2118 PetscErrorCode ierr; 2119 PetscBool iascii; 2120 PetscViewerFormat format; 2121 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2122 2123 PetscFunctionBegin; 2124 /* check if matrix is mumps type */ 2125 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2126 2127 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2128 if (iascii) { 2129 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2130 if (format == PETSC_VIEWER_ASCII_INFO) { 2131 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2132 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 2133 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 2134 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2135 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2136 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2137 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2138 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2139 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2140 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2141 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2142 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2143 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2144 if (mumps->id.ICNTL(11)>0) { 2145 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2146 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2150 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2151 } 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2155 /* ICNTL(15-17) not used */ 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2158 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2159 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2160 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2162 2163 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2169 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2176 2177 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2183 2184 /* infomation local to each processor */ 2185 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2187 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2188 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2190 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2191 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2192 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2193 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2194 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2195 2196 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2197 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2198 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2199 2200 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2201 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2202 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2203 2204 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2205 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2206 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2207 2208 if (mumps->ninfo && mumps->ninfo <= 80){ 2209 PetscInt i; 2210 for (i=0; i<mumps->ninfo; i++){ 2211 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2212 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2213 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2214 } 2215 } 2216 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2217 2218 if (!mumps->myid) { /* information from the host */ 2219 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2220 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2221 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2222 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2223 2224 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2225 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2226 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2227 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2228 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2229 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2230 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2231 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2232 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2233 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2234 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2237 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2258 } 2259 } 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2265 { 2266 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2267 2268 PetscFunctionBegin; 2269 info->block_size = 1.0; 2270 info->nz_allocated = mumps->id.INFOG(20); 2271 info->nz_used = mumps->id.INFOG(20); 2272 info->nz_unneeded = 0.0; 2273 info->assemblies = 0.0; 2274 info->mallocs = 0.0; 2275 info->memory = 0.0; 2276 info->fill_ratio_given = 0; 2277 info->fill_ratio_needed = 0; 2278 info->factor_mallocs = 0; 2279 PetscFunctionReturn(0); 2280 } 2281 2282 /* -------------------------------------------------------------------------------------------*/ 2283 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2284 { 2285 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2286 const PetscScalar *arr; 2287 const PetscInt *idxs; 2288 PetscInt size,i; 2289 PetscErrorCode ierr; 2290 2291 PetscFunctionBegin; 2292 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2293 if (mumps->petsc_size > 1) { 2294 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2295 2296 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2297 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2298 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2299 } 2300 2301 /* Schur complement matrix */ 2302 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2303 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2304 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2305 mumps->id.schur = (MumpsScalar*)arr; 2306 mumps->id.size_schur = size; 2307 mumps->id.schur_lld = size; 2308 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2309 if (mumps->sym == 1) { 2310 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2311 } 2312 2313 /* MUMPS expects Fortran style indices */ 2314 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2315 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2316 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2317 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2318 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2319 if (mumps->petsc_size > 1) { 2320 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2321 } else { 2322 if (F->factortype == MAT_FACTOR_LU) { 2323 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2324 } else { 2325 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2326 } 2327 } 2328 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2329 mumps->id.ICNTL(26) = -1; 2330 PetscFunctionReturn(0); 2331 } 2332 2333 /* -------------------------------------------------------------------------------------------*/ 2334 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2335 { 2336 Mat St; 2337 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2338 PetscScalar *array; 2339 #if defined(PETSC_USE_COMPLEX) 2340 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2341 #endif 2342 PetscErrorCode ierr; 2343 2344 PetscFunctionBegin; 2345 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2346 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2347 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2348 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2349 ierr = MatSetUp(St);CHKERRQ(ierr); 2350 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2351 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2352 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2353 PetscInt i,j,N=mumps->id.size_schur; 2354 for (i=0;i<N;i++) { 2355 for (j=0;j<N;j++) { 2356 #if !defined(PETSC_USE_COMPLEX) 2357 PetscScalar val = mumps->id.schur[i*N+j]; 2358 #else 2359 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2360 #endif 2361 array[j*N+i] = val; 2362 } 2363 } 2364 } else { /* stored by columns */ 2365 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2366 } 2367 } else { /* either full or lower-triangular (not packed) */ 2368 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2369 PetscInt i,j,N=mumps->id.size_schur; 2370 for (i=0;i<N;i++) { 2371 for (j=i;j<N;j++) { 2372 #if !defined(PETSC_USE_COMPLEX) 2373 PetscScalar val = mumps->id.schur[i*N+j]; 2374 #else 2375 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2376 #endif 2377 array[i*N+j] = val; 2378 array[j*N+i] = val; 2379 } 2380 } 2381 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2382 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2383 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2384 PetscInt i,j,N=mumps->id.size_schur; 2385 for (i=0;i<N;i++) { 2386 for (j=0;j<i+1;j++) { 2387 #if !defined(PETSC_USE_COMPLEX) 2388 PetscScalar val = mumps->id.schur[i*N+j]; 2389 #else 2390 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2391 #endif 2392 array[i*N+j] = val; 2393 array[j*N+i] = val; 2394 } 2395 } 2396 } 2397 } 2398 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2399 *S = St; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 /* -------------------------------------------------------------------------------------------*/ 2404 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2405 { 2406 PetscErrorCode ierr; 2407 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2408 2409 PetscFunctionBegin; 2410 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2415 { 2416 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2417 2418 PetscFunctionBegin; 2419 *ival = mumps->id.ICNTL(icntl); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 /*@ 2424 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2425 2426 Logically Collective on Mat 2427 2428 Input Parameters: 2429 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2430 . icntl - index of MUMPS parameter array ICNTL() 2431 - ival - value of MUMPS ICNTL(icntl) 2432 2433 Options Database: 2434 . -mat_mumps_icntl_<icntl> <ival> 2435 2436 Level: beginner 2437 2438 References: 2439 . MUMPS Users' Guide 2440 2441 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2442 @*/ 2443 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2444 { 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 PetscValidType(F,1); 2449 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2450 PetscValidLogicalCollectiveInt(F,icntl,2); 2451 PetscValidLogicalCollectiveInt(F,ival,3); 2452 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 /*@ 2457 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2458 2459 Logically Collective on Mat 2460 2461 Input Parameters: 2462 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2463 - icntl - index of MUMPS parameter array ICNTL() 2464 2465 Output Parameter: 2466 . ival - value of MUMPS ICNTL(icntl) 2467 2468 Level: beginner 2469 2470 References: 2471 . MUMPS Users' Guide 2472 2473 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2474 @*/ 2475 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2476 { 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 PetscValidType(F,1); 2481 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2482 PetscValidLogicalCollectiveInt(F,icntl,2); 2483 PetscValidIntPointer(ival,3); 2484 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 /* -------------------------------------------------------------------------------------------*/ 2489 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2490 { 2491 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2492 2493 PetscFunctionBegin; 2494 mumps->id.CNTL(icntl) = val; 2495 PetscFunctionReturn(0); 2496 } 2497 2498 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2499 { 2500 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2501 2502 PetscFunctionBegin; 2503 *val = mumps->id.CNTL(icntl); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 /*@ 2508 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2509 2510 Logically Collective on Mat 2511 2512 Input Parameters: 2513 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2514 . icntl - index of MUMPS parameter array CNTL() 2515 - val - value of MUMPS CNTL(icntl) 2516 2517 Options Database: 2518 . -mat_mumps_cntl_<icntl> <val> 2519 2520 Level: beginner 2521 2522 References: 2523 . MUMPS Users' Guide 2524 2525 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2526 @*/ 2527 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2528 { 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 PetscValidType(F,1); 2533 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2534 PetscValidLogicalCollectiveInt(F,icntl,2); 2535 PetscValidLogicalCollectiveReal(F,val,3); 2536 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 /*@ 2541 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2542 2543 Logically Collective on Mat 2544 2545 Input Parameters: 2546 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2547 - icntl - index of MUMPS parameter array CNTL() 2548 2549 Output Parameter: 2550 . val - value of MUMPS CNTL(icntl) 2551 2552 Level: beginner 2553 2554 References: 2555 . MUMPS Users' Guide 2556 2557 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2558 @*/ 2559 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 PetscValidType(F,1); 2565 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2566 PetscValidLogicalCollectiveInt(F,icntl,2); 2567 PetscValidRealPointer(val,3); 2568 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2573 { 2574 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2575 2576 PetscFunctionBegin; 2577 *info = mumps->id.INFO(icntl); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2582 { 2583 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2584 2585 PetscFunctionBegin; 2586 *infog = mumps->id.INFOG(icntl); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2591 { 2592 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2593 2594 PetscFunctionBegin; 2595 *rinfo = mumps->id.RINFO(icntl); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2600 { 2601 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2602 2603 PetscFunctionBegin; 2604 *rinfog = mumps->id.RINFOG(icntl); 2605 PetscFunctionReturn(0); 2606 } 2607 2608 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2609 { 2610 PetscErrorCode ierr; 2611 Mat Bt = NULL,Btseq = NULL; 2612 PetscBool flg; 2613 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2614 PetscScalar *aa; 2615 PetscInt spnr,*ia,*ja,M,nrhs; 2616 2617 PetscFunctionBegin; 2618 PetscValidIntPointer(spRHS,2); 2619 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2620 if (flg) { 2621 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2622 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2623 2624 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2625 2626 if (mumps->petsc_size > 1) { 2627 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2628 Btseq = b->A; 2629 } else { 2630 Btseq = Bt; 2631 } 2632 2633 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2634 mumps->id.nrhs = nrhs; 2635 mumps->id.lrhs = M; 2636 mumps->id.rhs = NULL; 2637 2638 if (!mumps->myid) { 2639 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2640 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2641 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2642 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2643 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2644 } else { 2645 mumps->id.irhs_ptr = NULL; 2646 mumps->id.irhs_sparse = NULL; 2647 mumps->id.nz_rhs = 0; 2648 mumps->id.rhs_sparse = NULL; 2649 } 2650 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2651 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2652 2653 /* solve phase */ 2654 /*-------------*/ 2655 mumps->id.job = JOB_SOLVE; 2656 PetscMUMPS_c(mumps); 2657 if (mumps->id.INFOG(1) < 0) 2658 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2659 2660 if (!mumps->myid) { 2661 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2662 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2663 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2664 } 2665 PetscFunctionReturn(0); 2666 } 2667 2668 /*@ 2669 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2670 2671 Logically Collective on Mat 2672 2673 Input Parameters: 2674 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2675 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2676 2677 Output Parameter: 2678 . spRHS - requested entries of inverse of A 2679 2680 Level: beginner 2681 2682 References: 2683 . MUMPS Users' Guide 2684 2685 .seealso: MatGetFactor(), MatCreateTranspose() 2686 @*/ 2687 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2688 { 2689 PetscErrorCode ierr; 2690 2691 PetscFunctionBegin; 2692 PetscValidType(F,1); 2693 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2694 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2699 { 2700 PetscErrorCode ierr; 2701 Mat spRHS; 2702 2703 PetscFunctionBegin; 2704 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2705 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2706 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 /*@ 2711 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2712 2713 Logically Collective on Mat 2714 2715 Input Parameters: 2716 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2717 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2718 2719 Output Parameter: 2720 . spRHST - requested entries of inverse of A^T 2721 2722 Level: beginner 2723 2724 References: 2725 . MUMPS Users' Guide 2726 2727 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2728 @*/ 2729 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2730 { 2731 PetscErrorCode ierr; 2732 PetscBool flg; 2733 2734 PetscFunctionBegin; 2735 PetscValidType(F,1); 2736 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2737 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2738 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2739 2740 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 /*@ 2745 MatMumpsGetInfo - Get MUMPS parameter INFO() 2746 2747 Logically Collective on Mat 2748 2749 Input Parameters: 2750 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2751 - icntl - index of MUMPS parameter array INFO() 2752 2753 Output Parameter: 2754 . ival - value of MUMPS INFO(icntl) 2755 2756 Level: beginner 2757 2758 References: 2759 . MUMPS Users' Guide 2760 2761 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2762 @*/ 2763 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2764 { 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 PetscValidType(F,1); 2769 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2770 PetscValidIntPointer(ival,3); 2771 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2772 PetscFunctionReturn(0); 2773 } 2774 2775 /*@ 2776 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2777 2778 Logically Collective on Mat 2779 2780 Input Parameters: 2781 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2782 - icntl - index of MUMPS parameter array INFOG() 2783 2784 Output Parameter: 2785 . ival - value of MUMPS INFOG(icntl) 2786 2787 Level: beginner 2788 2789 References: 2790 . MUMPS Users' Guide 2791 2792 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2793 @*/ 2794 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2795 { 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 PetscValidType(F,1); 2800 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2801 PetscValidIntPointer(ival,3); 2802 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 2806 /*@ 2807 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2808 2809 Logically Collective on Mat 2810 2811 Input Parameters: 2812 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2813 - icntl - index of MUMPS parameter array RINFO() 2814 2815 Output Parameter: 2816 . val - value of MUMPS RINFO(icntl) 2817 2818 Level: beginner 2819 2820 References: 2821 . MUMPS Users' Guide 2822 2823 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2824 @*/ 2825 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2826 { 2827 PetscErrorCode ierr; 2828 2829 PetscFunctionBegin; 2830 PetscValidType(F,1); 2831 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2832 PetscValidRealPointer(val,3); 2833 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 /*@ 2838 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2839 2840 Logically Collective on Mat 2841 2842 Input Parameters: 2843 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2844 - icntl - index of MUMPS parameter array RINFOG() 2845 2846 Output Parameter: 2847 . val - value of MUMPS RINFOG(icntl) 2848 2849 Level: beginner 2850 2851 References: 2852 . MUMPS Users' Guide 2853 2854 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2855 @*/ 2856 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2857 { 2858 PetscErrorCode ierr; 2859 2860 PetscFunctionBegin; 2861 PetscValidType(F,1); 2862 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2863 PetscValidRealPointer(val,3); 2864 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 /*MC 2869 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2870 distributed and sequential matrices via the external package MUMPS. 2871 2872 Works with MATAIJ and MATSBAIJ matrices 2873 2874 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2875 2876 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2877 2878 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2879 2880 Options Database Keys: 2881 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2882 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2883 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2884 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2885 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2886 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis 2887 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2888 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2889 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2890 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2891 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2892 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2893 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2894 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2895 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2896 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2897 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2898 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2899 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2900 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2901 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2902 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2903 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2904 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2905 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2906 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2907 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2908 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2909 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2910 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2911 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2912 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2913 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2914 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2915 2916 If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1 2917 2918 Level: beginner 2919 2920 Notes: 2921 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2922 2923 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2924 $ KSPGetPC(ksp,&pc); 2925 $ PCFactorGetMatrix(pc,&mat); 2926 $ MatMumpsGetInfo(mat,....); 2927 $ MatMumpsGetInfog(mat,....); etc. 2928 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2929 2930 Two modes to run MUMPS/PETSc with OpenMP 2931 2932 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2933 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2934 2935 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2936 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2937 2938 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2939 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2940 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2941 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2942 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2943 2944 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2945 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2946 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2947 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2948 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2949 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2950 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2951 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2952 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2953 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2954 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2955 examine the mapping result. 2956 2957 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2958 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2959 calls omp_set_num_threads(m) internally before calling MUMPS. 2960 2961 References: 2962 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2963 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2964 2965 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2966 2967 M*/ 2968 2969 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2970 { 2971 PetscFunctionBegin; 2972 *type = MATSOLVERMUMPS; 2973 PetscFunctionReturn(0); 2974 } 2975 2976 /* MatGetFactor for Seq and MPI AIJ matrices */ 2977 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2978 { 2979 Mat B; 2980 PetscErrorCode ierr; 2981 Mat_MUMPS *mumps; 2982 PetscBool isSeqAIJ; 2983 PetscMPIInt size; 2984 2985 PetscFunctionBegin; 2986 #if defined(PETSC_USE_COMPLEX) 2987 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2988 #endif 2989 /* Create the factorization matrix */ 2990 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2991 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2992 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2993 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2994 ierr = MatSetUp(B);CHKERRQ(ierr); 2995 2996 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2997 2998 B->ops->view = MatView_MUMPS; 2999 B->ops->getinfo = MatGetInfo_MUMPS; 3000 3001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3014 3015 if (ftype == MAT_FACTOR_LU) { 3016 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3017 B->factortype = MAT_FACTOR_LU; 3018 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3019 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3020 mumps->sym = 0; 3021 } else { 3022 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3023 B->factortype = MAT_FACTOR_CHOLESKY; 3024 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3025 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3026 #if defined(PETSC_USE_COMPLEX) 3027 mumps->sym = 2; 3028 #else 3029 if (A->spd_set && A->spd) mumps->sym = 1; 3030 else mumps->sym = 2; 3031 #endif 3032 } 3033 3034 /* set solvertype */ 3035 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3036 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3037 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3038 if (size == 1) { 3039 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3040 B->useordering = PETSC_TRUE; 3041 } 3042 3043 B->ops->destroy = MatDestroy_MUMPS; 3044 B->data = (void*)mumps; 3045 3046 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3047 3048 *F = B; 3049 PetscFunctionReturn(0); 3050 } 3051 3052 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3053 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3054 { 3055 Mat B; 3056 PetscErrorCode ierr; 3057 Mat_MUMPS *mumps; 3058 PetscBool isSeqSBAIJ; 3059 PetscMPIInt size; 3060 3061 PetscFunctionBegin; 3062 #if defined(PETSC_USE_COMPLEX) 3063 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3064 #endif 3065 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3066 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3067 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3068 ierr = MatSetUp(B);CHKERRQ(ierr); 3069 3070 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3071 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3072 if (isSeqSBAIJ) { 3073 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3074 } else { 3075 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3076 } 3077 3078 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3079 B->ops->view = MatView_MUMPS; 3080 B->ops->getinfo = MatGetInfo_MUMPS; 3081 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3095 3096 B->factortype = MAT_FACTOR_CHOLESKY; 3097 #if defined(PETSC_USE_COMPLEX) 3098 mumps->sym = 2; 3099 #else 3100 if (A->spd_set && A->spd) mumps->sym = 1; 3101 else mumps->sym = 2; 3102 #endif 3103 3104 /* set solvertype */ 3105 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3106 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3107 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3108 if (size == 1) { 3109 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3110 B->useordering = PETSC_TRUE; 3111 } 3112 3113 B->ops->destroy = MatDestroy_MUMPS; 3114 B->data = (void*)mumps; 3115 3116 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3117 3118 *F = B; 3119 PetscFunctionReturn(0); 3120 } 3121 3122 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3123 { 3124 Mat B; 3125 PetscErrorCode ierr; 3126 Mat_MUMPS *mumps; 3127 PetscBool isSeqBAIJ; 3128 PetscMPIInt size; 3129 3130 PetscFunctionBegin; 3131 /* Create the factorization matrix */ 3132 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3133 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3134 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3135 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3136 ierr = MatSetUp(B);CHKERRQ(ierr); 3137 3138 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3139 if (ftype == MAT_FACTOR_LU) { 3140 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3141 B->factortype = MAT_FACTOR_LU; 3142 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3143 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3144 mumps->sym = 0; 3145 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3146 3147 B->ops->view = MatView_MUMPS; 3148 B->ops->getinfo = MatGetInfo_MUMPS; 3149 3150 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3151 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3152 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3153 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3154 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3155 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3156 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3157 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3158 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3159 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3160 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3161 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3162 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3163 3164 /* set solvertype */ 3165 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3166 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3167 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3168 if (size == 1) { 3169 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3170 B->useordering = PETSC_TRUE; 3171 } 3172 3173 B->ops->destroy = MatDestroy_MUMPS; 3174 B->data = (void*)mumps; 3175 3176 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3177 3178 *F = B; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 /* MatGetFactor for Seq and MPI SELL matrices */ 3183 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3184 { 3185 Mat B; 3186 PetscErrorCode ierr; 3187 Mat_MUMPS *mumps; 3188 PetscBool isSeqSELL; 3189 PetscMPIInt size; 3190 3191 PetscFunctionBegin; 3192 /* Create the factorization matrix */ 3193 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3194 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3195 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3196 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3197 ierr = MatSetUp(B);CHKERRQ(ierr); 3198 3199 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3200 3201 B->ops->view = MatView_MUMPS; 3202 B->ops->getinfo = MatGetInfo_MUMPS; 3203 3204 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3205 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3206 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3210 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3211 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3212 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3213 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3214 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3215 3216 if (ftype == MAT_FACTOR_LU) { 3217 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3218 B->factortype = MAT_FACTOR_LU; 3219 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3220 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3221 mumps->sym = 0; 3222 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3223 3224 /* set solvertype */ 3225 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3226 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3227 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3228 if (size == 1) { 3229 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3230 B->useordering = PETSC_TRUE; 3231 } 3232 3233 B->ops->destroy = MatDestroy_MUMPS; 3234 B->data = (void*)mumps; 3235 3236 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3237 3238 *F = B; 3239 PetscFunctionReturn(0); 3240 } 3241 3242 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3243 { 3244 PetscErrorCode ierr; 3245 3246 PetscFunctionBegin; 3247 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3248 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3249 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3250 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3251 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3252 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3253 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3254 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3255 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3256 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3257 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3258 PetscFunctionReturn(0); 3259 } 3260 3261