1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while(0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 #endif 170 Vec b_seq,x_seq; 171 PetscInt ninfo,*info; /* which INFO to display */ 172 PetscInt sizeredrhs; 173 PetscScalar *schur_sol; 174 PetscInt schur_sizesol; 175 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 176 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 177 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 178 179 /* stuff used by petsc/mumps OpenMP support*/ 180 PetscBool use_petsc_omp_support; 181 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 182 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 183 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 184 PetscMPIInt tag,omp_comm_size; 185 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 186 MPI_Request *reqs; 187 }; 188 189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 190 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 191 */ 192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 193 { 194 PetscErrorCode ierr; 195 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 196 197 PetscFunctionBegin; 198 #if defined(PETSC_USE_64BIT_INDICES) 199 { 200 PetscInt i; 201 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 202 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 203 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 204 mumps->cur_ilen = nrow+1; 205 } 206 if (nnz > mumps->cur_jlen) { 207 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 208 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 209 mumps->cur_jlen = nnz; 210 } 211 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 212 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 213 *ia_mumps = mumps->ia_alloc; 214 *ja_mumps = mumps->ja_alloc; 215 } 216 #else 217 *ia_mumps = ia; 218 *ja_mumps = ja; 219 #endif 220 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 225 { 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 231 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 232 mumps->id.size_schur = 0; 233 mumps->id.schur_lld = 0; 234 mumps->id.ICNTL(19) = 0; 235 PetscFunctionReturn(0); 236 } 237 238 /* solve with rhs in mumps->id.redrhs and return in the same location */ 239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 240 { 241 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 242 Mat S,B,X; 243 MatFactorSchurStatus schurstatus; 244 PetscInt sizesol; 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 249 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 250 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 251 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 252 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 253 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 254 #endif 255 switch (schurstatus) { 256 case MAT_FACTOR_SCHUR_FACTORED: 257 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 258 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 260 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 261 #endif 262 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 263 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 264 } else { 265 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 266 } 267 break; 268 case MAT_FACTOR_SCHUR_INVERTED: 269 sizesol = mumps->id.nrhs*mumps->id.size_schur; 270 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 271 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 272 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 273 mumps->schur_sizesol = sizesol; 274 } 275 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 276 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 277 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 278 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 279 #endif 280 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 281 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 282 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 283 } else { 284 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 285 } 286 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 287 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 288 ierr = MatProductNumeric(X);CHKERRQ(ierr); 289 290 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 291 break; 292 default: 293 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 294 break; 295 } 296 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 297 ierr = MatDestroy(&B);CHKERRQ(ierr); 298 ierr = MatDestroy(&X);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 309 PetscFunctionReturn(0); 310 } 311 if (!expansion) { /* prepare for the condensation step */ 312 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 313 /* allocate MUMPS internal array to store reduced right-hand sides */ 314 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 315 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->id.lredrhs = mumps->id.size_schur; 317 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 318 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 319 } 320 mumps->id.ICNTL(26) = 1; /* condensation phase */ 321 } else { /* prepare for the expansion step */ 322 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 323 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 324 mumps->id.ICNTL(26) = 2; /* expansion phase */ 325 PetscMUMPS_c(mumps); 326 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 327 /* restore defaults */ 328 mumps->id.ICNTL(26) = -1; 329 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 330 if (mumps->id.nrhs > 1) { 331 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 332 mumps->id.lredrhs = 0; 333 mumps->sizeredrhs = 0; 334 } 335 } 336 PetscFunctionReturn(0); 337 } 338 339 /* 340 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 341 342 input: 343 A - matrix in aij,baij or sbaij format 344 shift - 0: C style output triple; 1: Fortran style output triple. 345 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 346 MAT_REUSE_MATRIX: only the values in v array are updated 347 output: 348 nnz - dim of r, c, and v (number of local nonzero entries of A) 349 r, c, v - row and col index, matrix values (matrix triples) 350 351 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 352 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 353 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 354 355 */ 356 357 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 358 { 359 const PetscScalar *av; 360 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 361 PetscInt64 nz,rnz,i,j,k; 362 PetscErrorCode ierr; 363 PetscMUMPSInt *row,*col; 364 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 365 366 PetscFunctionBegin; 367 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 368 mumps->val = (PetscScalar*)av; 369 if (reuse == MAT_INITIAL_MATRIX) { 370 nz = aa->nz; 371 ai = aa->i; 372 aj = aa->j; 373 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 374 for (i=k=0; i<M; i++) { 375 rnz = ai[i+1] - ai[i]; 376 ajj = aj + ai[i]; 377 for (j=0; j<rnz; j++) { 378 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 379 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 380 k++; 381 } 382 } 383 mumps->irn = row; 384 mumps->jcn = col; 385 mumps->nnz = nz; 386 } 387 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 392 { 393 PetscErrorCode ierr; 394 PetscInt64 nz,i,j,k,r; 395 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 396 PetscMUMPSInt *row,*col; 397 398 PetscFunctionBegin; 399 mumps->val = a->val; 400 if (reuse == MAT_INITIAL_MATRIX) { 401 nz = a->sliidx[a->totalslices]; 402 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 403 for (i=k=0; i<a->totalslices; i++) { 404 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 405 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 406 } 407 } 408 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 409 mumps->irn = row; 410 mumps->jcn = col; 411 mumps->nnz = nz; 412 } 413 PetscFunctionReturn(0); 414 } 415 416 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 417 { 418 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 419 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 420 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 421 PetscInt bs; 422 PetscErrorCode ierr; 423 PetscMUMPSInt *row,*col; 424 425 PetscFunctionBegin; 426 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 427 M = A->rmap->N/bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; aj = aa->j; 431 nz = bs2*aa->nz; 432 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 433 for (i=0; i<M; i++) { 434 ajj = aj + ai[i]; 435 rnz = ai[i+1] - ai[i]; 436 for (k=0; k<rnz; k++) { 437 for (j=0; j<bs; j++) { 438 for (m=0; m<bs; m++) { 439 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 440 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 441 idx++; 442 } 443 } 444 } 445 } 446 mumps->irn = row; 447 mumps->jcn = col; 448 mumps->nnz = nz; 449 } 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 454 { 455 const PetscInt *ai, *aj,*ajj; 456 PetscInt bs; 457 PetscInt64 nz,rnz,i,j,k,m; 458 PetscErrorCode ierr; 459 PetscMUMPSInt *row,*col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 462 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 470 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 nz = aa->nz; 477 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 478 if (bs>1) { 479 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 480 mumps->val = mumps->val_alloc; 481 } else { 482 mumps->val = aa->a; 483 } 484 mumps->irn = row; 485 mumps->jcn = col; 486 } else { 487 if (bs == 1) mumps->val = aa->a; 488 row = mumps->irn; 489 col = mumps->jcn; 490 } 491 val = mumps->val; 492 493 nz = 0; 494 if (bs>1) { 495 for (i=0; i<mbs; i++) { 496 rnz = ai[i+1] - ai[i]; 497 ajj = aj + ai[i]; 498 for (j=0; j<rnz; j++) { 499 for (k=0; k<bs; k++) { 500 for (m=0; m<bs; m++) { 501 if (ajj[j]>i || k>=m) { 502 if (reuse == MAT_INITIAL_MATRIX) { 503 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 504 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 505 } 506 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 507 } 508 } 509 } 510 } 511 } 512 } else if (reuse == MAT_INITIAL_MATRIX) { 513 for (i=0; i<mbs; i++) { 514 rnz = ai[i+1] - ai[i]; 515 ajj = aj + ai[i]; 516 for (j=0; j<rnz; j++) { 517 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 518 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 519 nz++; 520 } 521 } 522 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 523 } 524 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 525 PetscFunctionReturn(0); 526 } 527 528 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 529 { 530 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 531 PetscInt64 nz,rnz,i,j; 532 const PetscScalar *av,*v1; 533 PetscScalar *val; 534 PetscErrorCode ierr; 535 PetscMUMPSInt *row,*col; 536 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 548 ai = aa->i; aj = aa->j; 549 adiag = aa->diag; 550 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 551 if (reuse == MAT_INITIAL_MATRIX) { 552 /* count nz in the upper triangular part of A */ 553 nz = 0; 554 if (missing) { 555 for (i=0; i<M; i++) { 556 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 557 for (j=ai[i];j<ai[i+1];j++) { 558 if (aj[j] < i) continue; 559 nz++; 560 } 561 } else { 562 nz += ai[i+1] - adiag[i]; 563 } 564 } 565 } else { 566 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 567 } 568 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 569 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 570 mumps->nnz = nz; 571 mumps->irn = row; 572 mumps->jcn = col; 573 mumps->val = mumps->val_alloc = val; 574 575 nz = 0; 576 if (missing) { 577 for (i=0; i<M; i++) { 578 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 579 for (j=ai[i];j<ai[i+1];j++) { 580 if (aj[j] < i) continue; 581 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 582 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 583 val[nz] = av[j]; 584 nz++; 585 } 586 } else { 587 rnz = ai[i+1] - adiag[i]; 588 ajj = aj + adiag[i]; 589 v1 = av + adiag[i]; 590 for (j=0; j<rnz; j++) { 591 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 592 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 593 val[nz++] = v1[j]; 594 } 595 } 596 } 597 } else { 598 for (i=0; i<M; i++) { 599 rnz = ai[i+1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j=0; j<rnz; j++) { 603 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 604 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 nz = 0; 611 val = mumps->val; 612 if (missing) { 613 for (i=0; i <M; i++) { 614 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 615 for (j=ai[i];j<ai[i+1];j++) { 616 if (aj[j] < i) continue; 617 val[nz++] = av[j]; 618 } 619 } else { 620 rnz = ai[i+1] - adiag[i]; 621 v1 = av + adiag[i]; 622 for (j=0; j<rnz; j++) { 623 val[nz++] = v1[j]; 624 } 625 } 626 } 627 } else { 628 for (i=0; i <M; i++) { 629 rnz = ai[i+1] - adiag[i]; 630 v1 = av + adiag[i]; 631 for (j=0; j<rnz; j++) { 632 val[nz++] = v1[j]; 633 } 634 } 635 } 636 } 637 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 642 { 643 PetscErrorCode ierr; 644 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 645 PetscInt bs; 646 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 647 PetscMUMPSInt *row,*col; 648 const PetscScalar *av,*bv,*v1,*v2; 649 PetscScalar *val; 650 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 651 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 652 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 653 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 654 #if defined(PETSC_USE_COMPLEX) 655 PetscBool hermitian; 656 #endif 657 658 PetscFunctionBegin; 659 #if defined(PETSC_USE_COMPLEX) 660 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 661 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 662 #endif 663 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 664 rstart = A->rmap->rstart; 665 ai = aa->i; 666 aj = aa->j; 667 bi = bb->i; 668 bj = bb->j; 669 av = aa->a; 670 bv = bb->a; 671 672 garray = mat->garray; 673 674 if (reuse == MAT_INITIAL_MATRIX) { 675 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 676 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 677 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 678 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 679 mumps->irn = row; 680 mumps->jcn = col; 681 mumps->val = mumps->val_alloc = val; 682 } else { 683 val = mumps->val; 684 } 685 686 jj = 0; irow = rstart; 687 for (i=0; i<mbs; i++) { 688 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 689 countA = ai[i+1] - ai[i]; 690 countB = bi[i+1] - bi[i]; 691 bjj = bj + bi[i]; 692 v1 = av + ai[i]*bs2; 693 v2 = bv + bi[i]*bs2; 694 695 if (bs>1) { 696 /* A-part */ 697 for (j=0; j<countA; j++) { 698 for (k=0; k<bs; k++) { 699 for (m=0; m<bs; m++) { 700 if (rstart + ajj[j]*bs>irow || k>=m) { 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 703 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 704 } 705 val[jj++] = v1[j*bs2 + m + k*bs]; 706 } 707 } 708 } 709 } 710 711 /* B-part */ 712 for (j=0; j < countB; j++) { 713 for (k=0; k<bs; k++) { 714 for (m=0; m<bs; m++) { 715 if (reuse == MAT_INITIAL_MATRIX) { 716 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 717 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 718 } 719 val[jj++] = v2[j*bs2 + m + k*bs]; 720 } 721 } 722 } 723 } else { 724 /* A-part */ 725 for (j=0; j<countA; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 728 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 729 } 730 val[jj++] = v1[j]; 731 } 732 733 /* B-part */ 734 for (j=0; j < countB; j++) { 735 if (reuse == MAT_INITIAL_MATRIX) { 736 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 737 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 738 } 739 val[jj++] = v2[j]; 740 } 741 } 742 irow+=bs; 743 } 744 mumps->nnz = jj; 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 749 { 750 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 751 PetscErrorCode ierr; 752 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 753 PetscMUMPSInt *row,*col; 754 const PetscScalar *av, *bv,*v1,*v2; 755 PetscScalar *val; 756 Mat Ad,Ao; 757 Mat_SeqAIJ *aa; 758 Mat_SeqAIJ *bb; 759 760 PetscFunctionBegin; 761 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 762 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 763 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 764 765 aa = (Mat_SeqAIJ*)(Ad)->data; 766 bb = (Mat_SeqAIJ*)(Ao)->data; 767 ai = aa->i; 768 aj = aa->j; 769 bi = bb->i; 770 bj = bb->j; 771 772 rstart = A->rmap->rstart; 773 774 if (reuse == MAT_INITIAL_MATRIX) { 775 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 776 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 777 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 778 mumps->nnz = nz; 779 mumps->irn = row; 780 mumps->jcn = col; 781 mumps->val = mumps->val_alloc = val; 782 } else { 783 val = mumps->val; 784 } 785 786 jj = 0; irow = rstart; 787 for (i=0; i<m; i++) { 788 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 789 countA = ai[i+1] - ai[i]; 790 countB = bi[i+1] - bi[i]; 791 bjj = bj + bi[i]; 792 v1 = av + ai[i]; 793 v2 = bv + bi[i]; 794 795 /* A-part */ 796 for (j=0; j<countA; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 799 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 800 } 801 val[jj++] = v1[j]; 802 } 803 804 /* B-part */ 805 for (j=0; j < countB; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 808 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 809 } 810 val[jj++] = v2[j]; 811 } 812 irow++; 813 } 814 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 815 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 820 { 821 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 822 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 823 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 824 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 825 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 826 const PetscInt bs2=mat->bs2; 827 PetscErrorCode ierr; 828 PetscInt bs; 829 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 830 PetscMUMPSInt *row,*col; 831 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 832 PetscScalar *val; 833 834 PetscFunctionBegin; 835 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 836 if (reuse == MAT_INITIAL_MATRIX) { 837 nz = bs2*(aa->nz + bb->nz); 838 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 839 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 840 mumps->nnz = nz; 841 mumps->irn = row; 842 mumps->jcn = col; 843 mumps->val = mumps->val_alloc = val; 844 } else { 845 val = mumps->val; 846 } 847 848 jj = 0; irow = rstart; 849 for (i=0; i<mbs; i++) { 850 countA = ai[i+1] - ai[i]; 851 countB = bi[i+1] - bi[i]; 852 ajj = aj + ai[i]; 853 bjj = bj + bi[i]; 854 v1 = av + bs2*ai[i]; 855 v2 = bv + bs2*bi[i]; 856 857 idx = 0; 858 /* A-part */ 859 for (k=0; k<countA; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 864 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 865 } 866 val[jj++] = v1[idx++]; 867 } 868 } 869 } 870 871 idx = 0; 872 /* B-part */ 873 for (k=0; k<countB; k++) { 874 for (j=0; j<bs; j++) { 875 for (n=0; n<bs; n++) { 876 if (reuse == MAT_INITIAL_MATRIX) { 877 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 878 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 879 } 880 val[jj++] = v2[idx++]; 881 } 882 } 883 } 884 irow += bs; 885 } 886 PetscFunctionReturn(0); 887 } 888 889 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 890 { 891 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 892 PetscErrorCode ierr; 893 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 894 PetscMUMPSInt *row,*col; 895 const PetscScalar *av, *bv,*v1,*v2; 896 PetscScalar *val; 897 Mat Ad,Ao; 898 Mat_SeqAIJ *aa; 899 Mat_SeqAIJ *bb; 900 #if defined(PETSC_USE_COMPLEX) 901 PetscBool hermitian; 902 #endif 903 904 PetscFunctionBegin; 905 #if defined(PETSC_USE_COMPLEX) 906 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 907 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 908 #endif 909 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 911 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 912 913 aa = (Mat_SeqAIJ*)(Ad)->data; 914 bb = (Mat_SeqAIJ*)(Ao)->data; 915 ai = aa->i; 916 aj = aa->j; 917 adiag = aa->diag; 918 bi = bb->i; 919 bj = bb->j; 920 921 rstart = A->rmap->rstart; 922 923 if (reuse == MAT_INITIAL_MATRIX) { 924 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 925 nzb = 0; /* num of upper triangular entries in mat->B */ 926 for (i=0; i<m; i++) { 927 nza += (ai[i+1] - adiag[i]); 928 countB = bi[i+1] - bi[i]; 929 bjj = bj + bi[i]; 930 for (j=0; j<countB; j++) { 931 if (garray[bjj[j]] > rstart) nzb++; 932 } 933 } 934 935 nz = nza + nzb; /* total nz of upper triangular part of mat */ 936 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 937 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 938 mumps->nnz = nz; 939 mumps->irn = row; 940 mumps->jcn = col; 941 mumps->val = mumps->val_alloc = val; 942 } else { 943 val = mumps->val; 944 } 945 946 jj = 0; irow = rstart; 947 for (i=0; i<m; i++) { 948 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 949 v1 = av + adiag[i]; 950 countA = ai[i+1] - adiag[i]; 951 countB = bi[i+1] - bi[i]; 952 bjj = bj + bi[i]; 953 v2 = bv + bi[i]; 954 955 /* A-part */ 956 for (j=0; j<countA; j++) { 957 if (reuse == MAT_INITIAL_MATRIX) { 958 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 959 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 960 } 961 val[jj++] = v1[j]; 962 } 963 964 /* B-part */ 965 for (j=0; j < countB; j++) { 966 if (garray[bjj[j]] > rstart) { 967 if (reuse == MAT_INITIAL_MATRIX) { 968 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 969 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 970 } 971 val[jj++] = v2[j]; 972 } 973 } 974 irow++; 975 } 976 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 977 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatDestroy_MUMPS(Mat A) 982 { 983 PetscErrorCode ierr; 984 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 985 986 PetscFunctionBegin; 987 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 988 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 989 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 990 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 991 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 993 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 994 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 995 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 996 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 997 mumps->id.job = JOB_END; 998 PetscMUMPS_c(mumps); 999 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1000 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1001 if (mumps->use_petsc_omp_support) { 1002 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1003 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1004 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1005 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1006 #endif 1007 } 1008 #endif 1009 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1010 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1011 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1012 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1013 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1014 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1015 #endif 1016 ierr = PetscFree(A->data);CHKERRQ(ierr); 1017 1018 /* clear composed functions */ 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1029 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1030 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1031 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1036 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1037 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1038 { 1039 PetscErrorCode ierr; 1040 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1041 const PetscMPIInt ompsize=mumps->omp_comm_size; 1042 PetscInt i,m,M,rstart; 1043 1044 PetscFunctionBegin; 1045 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1046 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1047 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1048 if (ompsize == 1) { 1049 if (!mumps->irhs_loc) { 1050 mumps->nloc_rhs = m; 1051 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1052 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1053 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1054 } 1055 mumps->id.rhs_loc = (MumpsScalar*)array; 1056 } else { 1057 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1058 const PetscInt *ranges; 1059 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1060 MPI_Group petsc_group,omp_group; 1061 PetscScalar *recvbuf=NULL; 1062 1063 if (mumps->is_omp_master) { 1064 /* Lazily initialize the omp stuff for distributed rhs */ 1065 if (!mumps->irhs_loc) { 1066 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1067 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1068 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr); 1069 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1071 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr); 1072 1073 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1074 mumps->nloc_rhs = 0; 1075 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1076 for (j=0; j<ompsize; j++) { 1077 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1078 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1079 } 1080 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1081 for (j=k=0; j<ompsize; j++) { 1082 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1083 } 1084 1085 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1086 ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr); 1087 ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr); 1088 } 1089 1090 /* Realloc buffers when current nrhs is bigger than what we have met */ 1091 if (nrhs > mumps->max_nrhs) { 1092 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1093 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1094 mumps->max_nrhs = nrhs; 1095 } 1096 1097 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1098 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1099 mumps->rhs_disps[0] = 0; 1100 for (j=1; j<ompsize; j++) { 1101 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1102 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1103 } 1104 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1105 } 1106 1107 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1108 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr); 1109 1110 if (mumps->is_omp_master) { 1111 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1112 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1113 for (j=0; j<ompsize; j++) { 1114 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1115 dst = dstbase; 1116 for (i=0; i<nrhs; i++) { 1117 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1118 src += mumps->rhs_nrow[j]; 1119 dst += mumps->nloc_rhs; 1120 } 1121 dstbase += mumps->rhs_nrow[j]; 1122 } 1123 } 1124 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1125 } 1126 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1127 } 1128 mumps->id.nrhs = nrhs; 1129 mumps->id.nloc_rhs = mumps->nloc_rhs; 1130 mumps->id.lrhs_loc = mumps->nloc_rhs; 1131 mumps->id.irhs_loc = mumps->irhs_loc; 1132 PetscFunctionReturn(0); 1133 } 1134 #endif /* PETSC_PKG_MUMPS_VERSION_GE(5,3,0) */ 1135 1136 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1137 { 1138 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1139 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1140 const PetscScalar *rarray; 1141 #endif 1142 PetscScalar *array; 1143 IS is_iden,is_petsc; 1144 PetscErrorCode ierr; 1145 PetscInt i; 1146 PetscBool second_solve = PETSC_FALSE; 1147 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1148 1149 PetscFunctionBegin; 1150 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1151 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1152 1153 if (A->factorerrortype) { 1154 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1155 ierr = VecSetInf(x);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 mumps->id.nrhs = 1; 1160 if (mumps->petsc_size > 1) { 1161 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1162 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1163 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1164 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1165 #else 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/ 1167 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 if (!mumps->myid) { 1170 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1171 mumps->id.rhs = (MumpsScalar*)array; 1172 } 1173 #endif 1174 } else { /* petsc_size == 1 */ 1175 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1176 ierr = VecCopy(b,x);CHKERRQ(ierr); 1177 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1178 mumps->id.rhs = (MumpsScalar*)array; 1179 } 1180 1181 /* 1182 handle condensation step of Schur complement (if any) 1183 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1184 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1185 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1186 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1187 */ 1188 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1189 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1190 second_solve = PETSC_TRUE; 1191 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1192 } 1193 /* solve phase */ 1194 /*-------------*/ 1195 mumps->id.job = JOB_SOLVE; 1196 PetscMUMPS_c(mumps); 1197 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1198 1199 /* handle expansion step of Schur complement (if any) */ 1200 if (second_solve) { 1201 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1202 } 1203 1204 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1205 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1206 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1207 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1208 } 1209 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1210 PetscInt *isol2_loc=NULL; 1211 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1212 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1213 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1215 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1216 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1218 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1219 } 1220 1221 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 } 1224 1225 if (mumps->petsc_size > 1) { 1226 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1227 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1228 #else 1229 if (!mumps->myid) {ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);} 1230 #endif 1231 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1232 1233 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1238 { 1239 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 mumps->id.ICNTL(9) = 0; 1244 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1245 mumps->id.ICNTL(9) = 1; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1250 { 1251 PetscErrorCode ierr; 1252 Mat Bt = NULL; 1253 PetscBool denseX,denseB,flg,flgT; 1254 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1255 PetscInt i,nrhs,M; 1256 PetscScalar *array; 1257 const PetscScalar *rbray; 1258 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1259 PetscMUMPSInt *isol_loc,*isol_loc_save; 1260 PetscScalar *bray,*sol_loc,*sol_loc_save; 1261 IS is_to,is_from; 1262 PetscInt k,proc,j,m,myrstart; 1263 const PetscInt *rstart; 1264 Vec v_mpi,msol_loc; 1265 VecScatter scat_sol; 1266 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1267 Vec b_seq; 1268 VecScatter scat_rhs; 1269 #endif 1270 PetscScalar *aa; 1271 PetscInt spnr,*ia,*ja; 1272 Mat_MPIAIJ *b = NULL; 1273 1274 PetscFunctionBegin; 1275 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1276 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1277 1278 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1279 if (denseB) { 1280 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1281 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1282 } else { /* sparse B */ 1283 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1284 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1285 if (flgT) { /* input B is transpose of actural RHS matrix, 1286 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1287 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1288 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1289 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1290 } 1291 1292 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1293 mumps->id.nrhs = nrhs; 1294 mumps->id.lrhs = M; 1295 mumps->id.rhs = NULL; 1296 1297 if (mumps->petsc_size == 1) { 1298 PetscScalar *aa; 1299 PetscInt spnr,*ia,*ja; 1300 PetscBool second_solve = PETSC_FALSE; 1301 1302 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1303 mumps->id.rhs = (MumpsScalar*)array; 1304 1305 if (denseB) { 1306 /* copy B to X */ 1307 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1308 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1309 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1310 } else { /* sparse B */ 1311 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1312 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1313 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1314 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1315 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1316 } 1317 /* handle condensation step of Schur complement (if any) */ 1318 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1319 second_solve = PETSC_TRUE; 1320 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1321 } 1322 /* solve phase */ 1323 /*-------------*/ 1324 mumps->id.job = JOB_SOLVE; 1325 PetscMUMPS_c(mumps); 1326 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1327 1328 /* handle expansion step of Schur complement (if any) */ 1329 if (second_solve) { 1330 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1331 } 1332 if (!denseB) { /* sparse B */ 1333 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1334 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1335 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1336 } 1337 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1342 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1343 1344 /* create msol_loc to hold mumps local solution */ 1345 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1346 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1347 1348 lsol_loc = mumps->id.lsol_loc; 1349 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1350 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1351 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1352 mumps->id.isol_loc = isol_loc; 1353 1354 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1355 1356 if (denseB) { 1357 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1358 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1359 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1360 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1361 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1364 #else 1365 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1366 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1367 0, re-arrange B into desired order, which is a local operation. 1368 */ 1369 1370 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1371 /* wrap dense rhs matrix B into a vector v_mpi */ 1372 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1373 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1374 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1375 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1376 1377 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1378 if (!mumps->myid) { 1379 PetscInt *idx; 1380 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1381 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1382 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1383 k = 0; 1384 for (proc=0; proc<mumps->petsc_size; proc++){ 1385 for (j=0; j<nrhs; j++){ 1386 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1387 } 1388 } 1389 1390 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1391 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1392 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1393 } else { 1394 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1395 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1396 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1397 } 1398 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1399 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1400 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1401 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1402 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1403 1404 if (!mumps->myid) { /* define rhs on the host */ 1405 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1406 mumps->id.rhs = (MumpsScalar*)bray; 1407 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1408 } 1409 #endif 1410 } else { /* sparse B */ 1411 b = (Mat_MPIAIJ*)Bt->data; 1412 1413 /* wrap dense X into a vector v_mpi */ 1414 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1415 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1416 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1417 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1418 1419 if (!mumps->myid) { 1420 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1421 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1423 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1424 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1425 } else { 1426 mumps->id.irhs_ptr = NULL; 1427 mumps->id.irhs_sparse = NULL; 1428 mumps->id.nz_rhs = 0; 1429 mumps->id.rhs_sparse = NULL; 1430 } 1431 } 1432 1433 /* solve phase */ 1434 /*-------------*/ 1435 mumps->id.job = JOB_SOLVE; 1436 PetscMUMPS_c(mumps); 1437 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1438 1439 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1440 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1441 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1442 1443 /* create scatter scat_sol */ 1444 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1445 /* iidx: index for scatter mumps solution to petsc X */ 1446 1447 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1448 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1449 for (i=0; i<lsol_loc; i++) { 1450 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1451 1452 for (proc=0; proc<mumps->petsc_size; proc++){ 1453 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1454 myrstart = rstart[proc]; 1455 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1456 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1457 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1458 break; 1459 } 1460 } 1461 1462 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1463 } 1464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1465 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1466 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1468 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1469 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1471 1472 /* free spaces */ 1473 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1474 mumps->id.isol_loc = isol_loc_save; 1475 1476 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1477 ierr = PetscFree(idxx);CHKERRQ(ierr); 1478 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1479 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1480 if (!denseB) { 1481 if (!mumps->myid) { 1482 b = (Mat_MPIAIJ*)Bt->data; 1483 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1484 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1485 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1486 } 1487 } else { 1488 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1489 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1490 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1491 #endif 1492 } 1493 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1494 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1499 { 1500 PetscErrorCode ierr; 1501 PetscBool flg; 1502 Mat B; 1503 1504 PetscFunctionBegin; 1505 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1506 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1507 1508 /* Create B=Bt^T that uses Bt's data structure */ 1509 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1510 1511 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1512 ierr = MatDestroy(&B);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #if !defined(PETSC_USE_COMPLEX) 1517 /* 1518 input: 1519 F: numeric factor 1520 output: 1521 nneg: total number of negative pivots 1522 nzero: total number of zero pivots 1523 npos: (global dimension of F) - nneg - nzero 1524 */ 1525 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1526 { 1527 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1528 PetscErrorCode ierr; 1529 PetscMPIInt size; 1530 1531 PetscFunctionBegin; 1532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1533 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1534 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1535 1536 if (nneg) *nneg = mumps->id.INFOG(12); 1537 if (nzero || npos) { 1538 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1539 if (nzero) *nzero = mumps->id.INFOG(28); 1540 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1541 } 1542 PetscFunctionReturn(0); 1543 } 1544 #endif 1545 1546 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1547 { 1548 PetscErrorCode ierr; 1549 PetscInt i,nreqs; 1550 PetscMUMPSInt *irn,*jcn; 1551 PetscMPIInt count; 1552 PetscInt64 totnnz,remain; 1553 const PetscInt osize=mumps->omp_comm_size; 1554 PetscScalar *val; 1555 1556 PetscFunctionBegin; 1557 if (osize > 1) { 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 /* master first gathers counts of nonzeros to receive */ 1560 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1561 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1562 1563 /* Then each computes number of send/recvs */ 1564 if (mumps->is_omp_master) { 1565 /* Start from 1 since self communication is not done in MPI */ 1566 nreqs = 0; 1567 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1568 } else { 1569 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1570 } 1571 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1572 1573 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1574 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1575 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1576 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1577 */ 1578 nreqs = 0; /* counter for actual send/recvs */ 1579 if (mumps->is_omp_master) { 1580 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1581 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1582 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1583 1584 /* Self communication */ 1585 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1586 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1587 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1588 1589 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1590 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1591 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1592 mumps->nnz = totnnz; 1593 mumps->irn = irn; 1594 mumps->jcn = jcn; 1595 mumps->val = mumps->val_alloc = val; 1596 1597 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1598 jcn += mumps->recvcount[0]; 1599 val += mumps->recvcount[0]; 1600 1601 /* Remote communication */ 1602 for (i=1; i<osize; i++) { 1603 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1604 remain = mumps->recvcount[i] - count; 1605 while (count>0) { 1606 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1607 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1608 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1609 irn += count; 1610 jcn += count; 1611 val += count; 1612 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1613 remain -= count; 1614 } 1615 } 1616 } else { 1617 irn = mumps->irn; 1618 jcn = mumps->jcn; 1619 val = mumps->val; 1620 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1621 remain = mumps->nnz - count; 1622 while (count>0) { 1623 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1624 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1625 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1626 irn += count; 1627 jcn += count; 1628 val += count; 1629 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1630 remain -= count; 1631 } 1632 } 1633 } else { 1634 nreqs = 0; 1635 if (mumps->is_omp_master) { 1636 val = mumps->val + mumps->recvcount[0]; 1637 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1638 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1639 remain = mumps->recvcount[i] - count; 1640 while (count>0) { 1641 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1642 val += count; 1643 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1644 remain -= count; 1645 } 1646 } 1647 } else { 1648 val = mumps->val; 1649 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1650 remain = mumps->nnz - count; 1651 while (count>0) { 1652 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1653 val += count; 1654 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1655 remain -= count; 1656 } 1657 } 1658 } 1659 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1660 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1666 { 1667 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1668 PetscErrorCode ierr; 1669 PetscBool isMPIAIJ; 1670 1671 PetscFunctionBegin; 1672 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1673 if (mumps->id.INFOG(1) == -6) { 1674 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1675 } 1676 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1681 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1682 1683 /* numerical factorization phase */ 1684 /*-------------------------------*/ 1685 mumps->id.job = JOB_FACTNUMERIC; 1686 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1687 if (!mumps->myid) { 1688 mumps->id.a = (MumpsScalar*)mumps->val; 1689 } 1690 } else { 1691 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1692 } 1693 PetscMUMPS_c(mumps); 1694 if (mumps->id.INFOG(1) < 0) { 1695 if (A->erroriffailure) { 1696 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1697 } else { 1698 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1699 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1700 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1701 } else if (mumps->id.INFOG(1) == -13) { 1702 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1703 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1704 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1705 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1706 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1707 } else { 1708 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1709 F->factorerrortype = MAT_FACTOR_OTHER; 1710 } 1711 } 1712 } 1713 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1714 1715 F->assembled = PETSC_TRUE; 1716 mumps->matstruc = SAME_NONZERO_PATTERN; 1717 if (F->schur) { /* reset Schur status to unfactored */ 1718 #if defined(PETSC_HAVE_CUDA) 1719 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1720 #endif 1721 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1722 mumps->id.ICNTL(19) = 2; 1723 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1724 } 1725 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1726 } 1727 1728 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1729 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1730 1731 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1732 if (mumps->petsc_size > 1) { 1733 PetscInt lsol_loc; 1734 PetscScalar *sol_loc; 1735 1736 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1737 1738 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1739 if (mumps->x_seq) { 1740 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1741 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1742 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1743 } 1744 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1745 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1746 mumps->id.lsol_loc = lsol_loc; 1747 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1748 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1749 } 1750 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 /* Sets MUMPS options from the options database */ 1755 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1756 { 1757 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1758 PetscErrorCode ierr; 1759 PetscMUMPSInt icntl=0; 1760 PetscInt info[80],i,ninfo=80; 1761 PetscBool flg=PETSC_FALSE; 1762 1763 PetscFunctionBegin; 1764 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1765 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1766 if (flg) mumps->id.ICNTL(1) = icntl; 1767 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1768 if (flg) mumps->id.ICNTL(2) = icntl; 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) mumps->id.ICNTL(3) = icntl; 1771 1772 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1773 if (flg) mumps->id.ICNTL(4) = icntl; 1774 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1775 1776 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1777 if (flg) mumps->id.ICNTL(6) = icntl; 1778 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1780 if (flg) { 1781 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n"); 1782 else mumps->id.ICNTL(7) = icntl; 1783 } 1784 1785 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1786 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1787 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1788 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1789 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1790 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1791 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1792 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1793 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1794 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1795 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1796 } 1797 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1798 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1799 1800 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1801 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1802 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1803 if (mumps->id.ICNTL(24)) { 1804 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1805 } 1806 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1808 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1810 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1811 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1812 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1814 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1819 1820 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1821 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1822 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1824 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1826 1827 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1828 1829 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1830 if (ninfo) { 1831 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1832 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1833 mumps->ninfo = ninfo; 1834 for (i=0; i<ninfo; i++) { 1835 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1836 else mumps->info[i] = info[i]; 1837 } 1838 } 1839 1840 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1845 { 1846 PetscErrorCode ierr; 1847 PetscInt nthreads=0; 1848 1849 PetscFunctionBegin; 1850 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1851 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1852 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1853 1854 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1855 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1856 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1857 if (mumps->use_petsc_omp_support) { 1858 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1859 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1860 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1861 #else 1862 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1863 #endif 1864 } else { 1865 mumps->omp_comm = PETSC_COMM_SELF; 1866 mumps->mumps_comm = mumps->petsc_comm; 1867 mumps->is_omp_master = PETSC_TRUE; 1868 } 1869 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1870 mumps->reqs = NULL; 1871 mumps->tag = 0; 1872 1873 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1874 mumps->id.job = JOB_INIT; 1875 mumps->id.par = 1; /* host participates factorizaton and solve */ 1876 mumps->id.sym = mumps->sym; 1877 1878 PetscMUMPS_c(mumps); 1879 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1880 1881 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1882 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1883 */ 1884 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1885 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1886 1887 mumps->scat_rhs = NULL; 1888 mumps->scat_sol = NULL; 1889 1890 /* set PETSc-MUMPS default options - override MUMPS default */ 1891 mumps->id.ICNTL(3) = 0; 1892 mumps->id.ICNTL(4) = 0; 1893 if (mumps->petsc_size == 1) { 1894 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1895 } else { 1896 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1897 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1898 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1899 } 1900 1901 /* schur */ 1902 mumps->id.size_schur = 0; 1903 mumps->id.listvar_schur = NULL; 1904 mumps->id.schur = NULL; 1905 mumps->sizeredrhs = 0; 1906 mumps->schur_sol = NULL; 1907 mumps->schur_sizesol = 0; 1908 PetscFunctionReturn(0); 1909 } 1910 1911 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1912 { 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 if (mumps->id.INFOG(1) < 0) { 1917 if (A->erroriffailure) { 1918 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1919 } else { 1920 if (mumps->id.INFOG(1) == -6) { 1921 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1922 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1923 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1924 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1925 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1926 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1927 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1928 } else { 1929 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1930 F->factorerrortype = MAT_FACTOR_OTHER; 1931 } 1932 } 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1938 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1939 { 1940 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1941 PetscErrorCode ierr; 1942 Vec b; 1943 const PetscInt M = A->rmap->N; 1944 1945 PetscFunctionBegin; 1946 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1947 1948 /* Set MUMPS options from the options database */ 1949 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1950 1951 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1952 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1953 1954 /* analysis phase */ 1955 /*----------------*/ 1956 mumps->id.job = JOB_FACTSYMBOLIC; 1957 mumps->id.n = M; 1958 switch (mumps->id.ICNTL(18)) { 1959 case 0: /* centralized assembled matrix input */ 1960 if (!mumps->myid) { 1961 mumps->id.nnz = mumps->nnz; 1962 mumps->id.irn = mumps->irn; 1963 mumps->id.jcn = mumps->jcn; 1964 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1965 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1966 if (!mumps->myid) { 1967 const PetscInt *idx; 1968 PetscInt i; 1969 1970 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1971 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1972 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1973 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1974 } 1975 } 1976 } 1977 break; 1978 case 3: /* distributed assembled matrix input (size>1) */ 1979 mumps->id.nnz_loc = mumps->nnz; 1980 mumps->id.irn_loc = mumps->irn; 1981 mumps->id.jcn_loc = mumps->jcn; 1982 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1983 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1984 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1985 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1986 ierr = VecDestroy(&b);CHKERRQ(ierr); 1987 break; 1988 } 1989 PetscMUMPS_c(mumps); 1990 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1991 1992 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1993 F->ops->solve = MatSolve_MUMPS; 1994 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1995 F->ops->matsolve = MatMatSolve_MUMPS; 1996 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /* Note the Petsc r and c permutations are ignored */ 2001 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2002 { 2003 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2004 PetscErrorCode ierr; 2005 Vec b; 2006 const PetscInt M = A->rmap->N; 2007 2008 PetscFunctionBegin; 2009 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2010 2011 /* Set MUMPS options from the options database */ 2012 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2013 2014 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2015 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2016 2017 /* analysis phase */ 2018 /*----------------*/ 2019 mumps->id.job = JOB_FACTSYMBOLIC; 2020 mumps->id.n = M; 2021 switch (mumps->id.ICNTL(18)) { 2022 case 0: /* centralized assembled matrix input */ 2023 if (!mumps->myid) { 2024 mumps->id.nnz = mumps->nnz; 2025 mumps->id.irn = mumps->irn; 2026 mumps->id.jcn = mumps->jcn; 2027 if (mumps->id.ICNTL(6)>1) { 2028 mumps->id.a = (MumpsScalar*)mumps->val; 2029 } 2030 } 2031 break; 2032 case 3: /* distributed assembled matrix input (size>1) */ 2033 mumps->id.nnz_loc = mumps->nnz; 2034 mumps->id.irn_loc = mumps->irn; 2035 mumps->id.jcn_loc = mumps->jcn; 2036 if (mumps->id.ICNTL(6)>1) { 2037 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2038 } 2039 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2040 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2041 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2042 ierr = VecDestroy(&b);CHKERRQ(ierr); 2043 break; 2044 } 2045 PetscMUMPS_c(mumps); 2046 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2047 2048 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2049 F->ops->solve = MatSolve_MUMPS; 2050 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /* Note the Petsc r permutation and factor info are ignored */ 2055 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2056 { 2057 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2058 PetscErrorCode ierr; 2059 Vec b; 2060 const PetscInt M = A->rmap->N; 2061 2062 PetscFunctionBegin; 2063 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2064 2065 /* Set MUMPS options from the options database */ 2066 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2067 2068 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2069 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2070 2071 /* analysis phase */ 2072 /*----------------*/ 2073 mumps->id.job = JOB_FACTSYMBOLIC; 2074 mumps->id.n = M; 2075 switch (mumps->id.ICNTL(18)) { 2076 case 0: /* centralized assembled matrix input */ 2077 if (!mumps->myid) { 2078 mumps->id.nnz = mumps->nnz; 2079 mumps->id.irn = mumps->irn; 2080 mumps->id.jcn = mumps->jcn; 2081 if (mumps->id.ICNTL(6)>1) { 2082 mumps->id.a = (MumpsScalar*)mumps->val; 2083 } 2084 } 2085 break; 2086 case 3: /* distributed assembled matrix input (size>1) */ 2087 mumps->id.nnz_loc = mumps->nnz; 2088 mumps->id.irn_loc = mumps->irn; 2089 mumps->id.jcn_loc = mumps->jcn; 2090 if (mumps->id.ICNTL(6)>1) { 2091 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2092 } 2093 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2094 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2095 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2096 ierr = VecDestroy(&b);CHKERRQ(ierr); 2097 break; 2098 } 2099 PetscMUMPS_c(mumps); 2100 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2101 2102 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2103 F->ops->solve = MatSolve_MUMPS; 2104 F->ops->solvetranspose = MatSolve_MUMPS; 2105 F->ops->matsolve = MatMatSolve_MUMPS; 2106 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2107 #if defined(PETSC_USE_COMPLEX) 2108 F->ops->getinertia = NULL; 2109 #else 2110 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2111 #endif 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2116 { 2117 PetscErrorCode ierr; 2118 PetscBool iascii; 2119 PetscViewerFormat format; 2120 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2121 2122 PetscFunctionBegin; 2123 /* check if matrix is mumps type */ 2124 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2125 2126 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2127 if (iascii) { 2128 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2129 if (format == PETSC_VIEWER_ASCII_INFO) { 2130 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2131 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 2132 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 2133 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2134 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2135 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2136 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2137 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2138 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2139 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2140 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2141 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2142 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2143 if (mumps->id.ICNTL(11)>0) { 2144 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2145 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2146 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2150 } 2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2154 /* ICNTL(15-17) not used */ 2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2158 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2159 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2160 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2161 2162 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2168 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2175 2176 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2182 2183 /* infomation local to each processor */ 2184 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2185 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2186 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2187 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2189 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2192 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2194 2195 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2196 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2197 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2198 2199 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2200 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2201 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2202 2203 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2204 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2205 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2206 2207 if (mumps->ninfo && mumps->ninfo <= 80){ 2208 PetscInt i; 2209 for (i=0; i<mumps->ninfo; i++){ 2210 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2213 } 2214 } 2215 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2216 2217 if (!mumps->myid) { /* information from the host */ 2218 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2219 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2220 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2221 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2222 2223 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2224 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2225 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2226 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2227 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2228 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2229 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2230 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2231 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2232 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2233 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2234 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2237 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2257 } 2258 } 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2264 { 2265 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2266 2267 PetscFunctionBegin; 2268 info->block_size = 1.0; 2269 info->nz_allocated = mumps->id.INFOG(20); 2270 info->nz_used = mumps->id.INFOG(20); 2271 info->nz_unneeded = 0.0; 2272 info->assemblies = 0.0; 2273 info->mallocs = 0.0; 2274 info->memory = 0.0; 2275 info->fill_ratio_given = 0; 2276 info->fill_ratio_needed = 0; 2277 info->factor_mallocs = 0; 2278 PetscFunctionReturn(0); 2279 } 2280 2281 /* -------------------------------------------------------------------------------------------*/ 2282 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2283 { 2284 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2285 const PetscScalar *arr; 2286 const PetscInt *idxs; 2287 PetscInt size,i; 2288 PetscErrorCode ierr; 2289 2290 PetscFunctionBegin; 2291 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2292 if (mumps->petsc_size > 1) { 2293 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2294 2295 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2296 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2297 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2298 } 2299 2300 /* Schur complement matrix */ 2301 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2302 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2303 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2304 mumps->id.schur = (MumpsScalar*)arr; 2305 mumps->id.size_schur = size; 2306 mumps->id.schur_lld = size; 2307 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2308 if (mumps->sym == 1) { 2309 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2310 } 2311 2312 /* MUMPS expects Fortran style indices */ 2313 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2314 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2315 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2316 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2317 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2318 if (mumps->petsc_size > 1) { 2319 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2320 } else { 2321 if (F->factortype == MAT_FACTOR_LU) { 2322 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2323 } else { 2324 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2325 } 2326 } 2327 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2328 mumps->id.ICNTL(26) = -1; 2329 PetscFunctionReturn(0); 2330 } 2331 2332 /* -------------------------------------------------------------------------------------------*/ 2333 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2334 { 2335 Mat St; 2336 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2337 PetscScalar *array; 2338 #if defined(PETSC_USE_COMPLEX) 2339 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2340 #endif 2341 PetscErrorCode ierr; 2342 2343 PetscFunctionBegin; 2344 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2345 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2346 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2347 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2348 ierr = MatSetUp(St);CHKERRQ(ierr); 2349 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2350 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2351 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2352 PetscInt i,j,N=mumps->id.size_schur; 2353 for (i=0;i<N;i++) { 2354 for (j=0;j<N;j++) { 2355 #if !defined(PETSC_USE_COMPLEX) 2356 PetscScalar val = mumps->id.schur[i*N+j]; 2357 #else 2358 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2359 #endif 2360 array[j*N+i] = val; 2361 } 2362 } 2363 } else { /* stored by columns */ 2364 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2365 } 2366 } else { /* either full or lower-triangular (not packed) */ 2367 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2368 PetscInt i,j,N=mumps->id.size_schur; 2369 for (i=0;i<N;i++) { 2370 for (j=i;j<N;j++) { 2371 #if !defined(PETSC_USE_COMPLEX) 2372 PetscScalar val = mumps->id.schur[i*N+j]; 2373 #else 2374 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2375 #endif 2376 array[i*N+j] = val; 2377 array[j*N+i] = val; 2378 } 2379 } 2380 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2381 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2382 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2383 PetscInt i,j,N=mumps->id.size_schur; 2384 for (i=0;i<N;i++) { 2385 for (j=0;j<i+1;j++) { 2386 #if !defined(PETSC_USE_COMPLEX) 2387 PetscScalar val = mumps->id.schur[i*N+j]; 2388 #else 2389 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2390 #endif 2391 array[i*N+j] = val; 2392 array[j*N+i] = val; 2393 } 2394 } 2395 } 2396 } 2397 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2398 *S = St; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 /* -------------------------------------------------------------------------------------------*/ 2403 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2404 { 2405 PetscErrorCode ierr; 2406 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2407 2408 PetscFunctionBegin; 2409 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2414 { 2415 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2416 2417 PetscFunctionBegin; 2418 *ival = mumps->id.ICNTL(icntl); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 /*@ 2423 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2424 2425 Logically Collective on Mat 2426 2427 Input Parameters: 2428 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2429 . icntl - index of MUMPS parameter array ICNTL() 2430 - ival - value of MUMPS ICNTL(icntl) 2431 2432 Options Database: 2433 . -mat_mumps_icntl_<icntl> <ival> 2434 2435 Level: beginner 2436 2437 References: 2438 . MUMPS Users' Guide 2439 2440 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2441 @*/ 2442 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2443 { 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 PetscValidType(F,1); 2448 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2449 PetscValidLogicalCollectiveInt(F,icntl,2); 2450 PetscValidLogicalCollectiveInt(F,ival,3); 2451 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /*@ 2456 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2457 2458 Logically Collective on Mat 2459 2460 Input Parameters: 2461 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2462 - icntl - index of MUMPS parameter array ICNTL() 2463 2464 Output Parameter: 2465 . ival - value of MUMPS ICNTL(icntl) 2466 2467 Level: beginner 2468 2469 References: 2470 . MUMPS Users' Guide 2471 2472 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2473 @*/ 2474 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2475 { 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 PetscValidType(F,1); 2480 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2481 PetscValidLogicalCollectiveInt(F,icntl,2); 2482 PetscValidIntPointer(ival,3); 2483 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /* -------------------------------------------------------------------------------------------*/ 2488 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2489 { 2490 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2491 2492 PetscFunctionBegin; 2493 mumps->id.CNTL(icntl) = val; 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2498 { 2499 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2500 2501 PetscFunctionBegin; 2502 *val = mumps->id.CNTL(icntl); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 /*@ 2507 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2508 2509 Logically Collective on Mat 2510 2511 Input Parameters: 2512 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2513 . icntl - index of MUMPS parameter array CNTL() 2514 - val - value of MUMPS CNTL(icntl) 2515 2516 Options Database: 2517 . -mat_mumps_cntl_<icntl> <val> 2518 2519 Level: beginner 2520 2521 References: 2522 . MUMPS Users' Guide 2523 2524 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2525 @*/ 2526 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2527 { 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 PetscValidType(F,1); 2532 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2533 PetscValidLogicalCollectiveInt(F,icntl,2); 2534 PetscValidLogicalCollectiveReal(F,val,3); 2535 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 /*@ 2540 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2541 2542 Logically Collective on Mat 2543 2544 Input Parameters: 2545 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2546 - icntl - index of MUMPS parameter array CNTL() 2547 2548 Output Parameter: 2549 . val - value of MUMPS CNTL(icntl) 2550 2551 Level: beginner 2552 2553 References: 2554 . MUMPS Users' Guide 2555 2556 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2557 @*/ 2558 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2559 { 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 PetscValidType(F,1); 2564 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2565 PetscValidLogicalCollectiveInt(F,icntl,2); 2566 PetscValidRealPointer(val,3); 2567 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2572 { 2573 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2574 2575 PetscFunctionBegin; 2576 *info = mumps->id.INFO(icntl); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2581 { 2582 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2583 2584 PetscFunctionBegin; 2585 *infog = mumps->id.INFOG(icntl); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2590 { 2591 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2592 2593 PetscFunctionBegin; 2594 *rinfo = mumps->id.RINFO(icntl); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2599 { 2600 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2601 2602 PetscFunctionBegin; 2603 *rinfog = mumps->id.RINFOG(icntl); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2608 { 2609 PetscErrorCode ierr; 2610 Mat Bt = NULL,Btseq = NULL; 2611 PetscBool flg; 2612 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2613 PetscScalar *aa; 2614 PetscInt spnr,*ia,*ja,M,nrhs; 2615 2616 PetscFunctionBegin; 2617 PetscValidIntPointer(spRHS,2); 2618 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2619 if (flg) { 2620 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2621 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2622 2623 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2624 2625 if (mumps->petsc_size > 1) { 2626 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2627 Btseq = b->A; 2628 } else { 2629 Btseq = Bt; 2630 } 2631 2632 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2633 mumps->id.nrhs = nrhs; 2634 mumps->id.lrhs = M; 2635 mumps->id.rhs = NULL; 2636 2637 if (!mumps->myid) { 2638 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2639 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2640 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2641 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2642 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2643 } else { 2644 mumps->id.irhs_ptr = NULL; 2645 mumps->id.irhs_sparse = NULL; 2646 mumps->id.nz_rhs = 0; 2647 mumps->id.rhs_sparse = NULL; 2648 } 2649 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2650 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2651 2652 /* solve phase */ 2653 /*-------------*/ 2654 mumps->id.job = JOB_SOLVE; 2655 PetscMUMPS_c(mumps); 2656 if (mumps->id.INFOG(1) < 0) 2657 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2658 2659 if (!mumps->myid) { 2660 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2661 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2662 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2663 } 2664 PetscFunctionReturn(0); 2665 } 2666 2667 /*@ 2668 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2669 2670 Logically Collective on Mat 2671 2672 Input Parameters: 2673 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2674 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2675 2676 Output Parameter: 2677 . spRHS - requested entries of inverse of A 2678 2679 Level: beginner 2680 2681 References: 2682 . MUMPS Users' Guide 2683 2684 .seealso: MatGetFactor(), MatCreateTranspose() 2685 @*/ 2686 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2687 { 2688 PetscErrorCode ierr; 2689 2690 PetscFunctionBegin; 2691 PetscValidType(F,1); 2692 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2693 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2694 PetscFunctionReturn(0); 2695 } 2696 2697 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2698 { 2699 PetscErrorCode ierr; 2700 Mat spRHS; 2701 2702 PetscFunctionBegin; 2703 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2704 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2705 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2706 PetscFunctionReturn(0); 2707 } 2708 2709 /*@ 2710 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2711 2712 Logically Collective on Mat 2713 2714 Input Parameters: 2715 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2716 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2717 2718 Output Parameter: 2719 . spRHST - requested entries of inverse of A^T 2720 2721 Level: beginner 2722 2723 References: 2724 . MUMPS Users' Guide 2725 2726 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2727 @*/ 2728 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2729 { 2730 PetscErrorCode ierr; 2731 PetscBool flg; 2732 2733 PetscFunctionBegin; 2734 PetscValidType(F,1); 2735 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2736 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2737 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2738 2739 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 /*@ 2744 MatMumpsGetInfo - Get MUMPS parameter INFO() 2745 2746 Logically Collective on Mat 2747 2748 Input Parameters: 2749 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2750 - icntl - index of MUMPS parameter array INFO() 2751 2752 Output Parameter: 2753 . ival - value of MUMPS INFO(icntl) 2754 2755 Level: beginner 2756 2757 References: 2758 . MUMPS Users' Guide 2759 2760 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2761 @*/ 2762 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2763 { 2764 PetscErrorCode ierr; 2765 2766 PetscFunctionBegin; 2767 PetscValidType(F,1); 2768 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2769 PetscValidIntPointer(ival,3); 2770 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2771 PetscFunctionReturn(0); 2772 } 2773 2774 /*@ 2775 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2776 2777 Logically Collective on Mat 2778 2779 Input Parameters: 2780 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2781 - icntl - index of MUMPS parameter array INFOG() 2782 2783 Output Parameter: 2784 . ival - value of MUMPS INFOG(icntl) 2785 2786 Level: beginner 2787 2788 References: 2789 . MUMPS Users' Guide 2790 2791 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2792 @*/ 2793 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2794 { 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 PetscValidType(F,1); 2799 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2800 PetscValidIntPointer(ival,3); 2801 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2802 PetscFunctionReturn(0); 2803 } 2804 2805 /*@ 2806 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2807 2808 Logically Collective on Mat 2809 2810 Input Parameters: 2811 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2812 - icntl - index of MUMPS parameter array RINFO() 2813 2814 Output Parameter: 2815 . val - value of MUMPS RINFO(icntl) 2816 2817 Level: beginner 2818 2819 References: 2820 . MUMPS Users' Guide 2821 2822 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2823 @*/ 2824 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2825 { 2826 PetscErrorCode ierr; 2827 2828 PetscFunctionBegin; 2829 PetscValidType(F,1); 2830 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2831 PetscValidRealPointer(val,3); 2832 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 2836 /*@ 2837 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2838 2839 Logically Collective on Mat 2840 2841 Input Parameters: 2842 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2843 - icntl - index of MUMPS parameter array RINFOG() 2844 2845 Output Parameter: 2846 . val - value of MUMPS RINFOG(icntl) 2847 2848 Level: beginner 2849 2850 References: 2851 . MUMPS Users' Guide 2852 2853 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2854 @*/ 2855 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2856 { 2857 PetscErrorCode ierr; 2858 2859 PetscFunctionBegin; 2860 PetscValidType(F,1); 2861 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2862 PetscValidRealPointer(val,3); 2863 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 /*MC 2868 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2869 distributed and sequential matrices via the external package MUMPS. 2870 2871 Works with MATAIJ and MATSBAIJ matrices 2872 2873 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2874 2875 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2876 2877 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2878 2879 Options Database Keys: 2880 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2881 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2882 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2883 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2884 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2885 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis 2886 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2887 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2888 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2889 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2890 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2891 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2892 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2893 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2894 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2895 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2896 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2897 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2898 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2899 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2900 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2901 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2902 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2903 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2904 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2905 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2906 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2907 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2908 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2909 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2910 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2911 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2912 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2913 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2914 2915 If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1 2916 2917 Level: beginner 2918 2919 Notes: 2920 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2921 2922 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2923 $ KSPGetPC(ksp,&pc); 2924 $ PCFactorGetMatrix(pc,&mat); 2925 $ MatMumpsGetInfo(mat,....); 2926 $ MatMumpsGetInfog(mat,....); etc. 2927 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2928 2929 Two modes to run MUMPS/PETSc with OpenMP 2930 2931 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2932 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2933 2934 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2935 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2936 2937 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2938 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2939 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2940 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2941 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2942 2943 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2944 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2945 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2946 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2947 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2948 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2949 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2950 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2951 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2952 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2953 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2954 examine the mapping result. 2955 2956 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2957 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2958 calls omp_set_num_threads(m) internally before calling MUMPS. 2959 2960 References: 2961 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2962 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2963 2964 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2965 2966 M*/ 2967 2968 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2969 { 2970 PetscFunctionBegin; 2971 *type = MATSOLVERMUMPS; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 /* MatGetFactor for Seq and MPI AIJ matrices */ 2976 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2977 { 2978 Mat B; 2979 PetscErrorCode ierr; 2980 Mat_MUMPS *mumps; 2981 PetscBool isSeqAIJ; 2982 PetscMPIInt size; 2983 2984 PetscFunctionBegin; 2985 #if defined(PETSC_USE_COMPLEX) 2986 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2987 #endif 2988 /* Create the factorization matrix */ 2989 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2990 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2991 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2992 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2993 ierr = MatSetUp(B);CHKERRQ(ierr); 2994 2995 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2996 2997 B->ops->view = MatView_MUMPS; 2998 B->ops->getinfo = MatGetInfo_MUMPS; 2999 3000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3013 3014 if (ftype == MAT_FACTOR_LU) { 3015 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3016 B->factortype = MAT_FACTOR_LU; 3017 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3018 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3019 mumps->sym = 0; 3020 } else { 3021 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3022 B->factortype = MAT_FACTOR_CHOLESKY; 3023 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3024 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3025 #if defined(PETSC_USE_COMPLEX) 3026 mumps->sym = 2; 3027 #else 3028 if (A->spd_set && A->spd) mumps->sym = 1; 3029 else mumps->sym = 2; 3030 #endif 3031 } 3032 3033 /* set solvertype */ 3034 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3035 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3036 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3037 if (size == 1) { 3038 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3039 B->useordering = PETSC_TRUE; 3040 } 3041 3042 B->ops->destroy = MatDestroy_MUMPS; 3043 B->data = (void*)mumps; 3044 3045 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3046 3047 *F = B; 3048 PetscFunctionReturn(0); 3049 } 3050 3051 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3052 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3053 { 3054 Mat B; 3055 PetscErrorCode ierr; 3056 Mat_MUMPS *mumps; 3057 PetscBool isSeqSBAIJ; 3058 PetscMPIInt size; 3059 3060 PetscFunctionBegin; 3061 #if defined(PETSC_USE_COMPLEX) 3062 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3063 #endif 3064 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3065 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3066 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3067 ierr = MatSetUp(B);CHKERRQ(ierr); 3068 3069 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3070 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3071 if (isSeqSBAIJ) { 3072 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3073 } else { 3074 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3075 } 3076 3077 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3078 B->ops->view = MatView_MUMPS; 3079 B->ops->getinfo = MatGetInfo_MUMPS; 3080 3081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3094 3095 B->factortype = MAT_FACTOR_CHOLESKY; 3096 #if defined(PETSC_USE_COMPLEX) 3097 mumps->sym = 2; 3098 #else 3099 if (A->spd_set && A->spd) mumps->sym = 1; 3100 else mumps->sym = 2; 3101 #endif 3102 3103 /* set solvertype */ 3104 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3105 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3106 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3107 if (size == 1) { 3108 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3109 B->useordering = PETSC_TRUE; 3110 } 3111 3112 B->ops->destroy = MatDestroy_MUMPS; 3113 B->data = (void*)mumps; 3114 3115 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3116 3117 *F = B; 3118 PetscFunctionReturn(0); 3119 } 3120 3121 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3122 { 3123 Mat B; 3124 PetscErrorCode ierr; 3125 Mat_MUMPS *mumps; 3126 PetscBool isSeqBAIJ; 3127 PetscMPIInt size; 3128 3129 PetscFunctionBegin; 3130 /* Create the factorization matrix */ 3131 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3132 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3133 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3134 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3135 ierr = MatSetUp(B);CHKERRQ(ierr); 3136 3137 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3138 if (ftype == MAT_FACTOR_LU) { 3139 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3140 B->factortype = MAT_FACTOR_LU; 3141 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3142 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3143 mumps->sym = 0; 3144 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3145 3146 B->ops->view = MatView_MUMPS; 3147 B->ops->getinfo = MatGetInfo_MUMPS; 3148 3149 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3150 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3151 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3152 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3153 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3154 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3155 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3156 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3157 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3158 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3159 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3160 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3161 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3162 3163 /* set solvertype */ 3164 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3165 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3166 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3167 if (size == 1) { 3168 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3169 B->useordering = PETSC_TRUE; 3170 } 3171 3172 B->ops->destroy = MatDestroy_MUMPS; 3173 B->data = (void*)mumps; 3174 3175 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3176 3177 *F = B; 3178 PetscFunctionReturn(0); 3179 } 3180 3181 /* MatGetFactor for Seq and MPI SELL matrices */ 3182 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3183 { 3184 Mat B; 3185 PetscErrorCode ierr; 3186 Mat_MUMPS *mumps; 3187 PetscBool isSeqSELL; 3188 PetscMPIInt size; 3189 3190 PetscFunctionBegin; 3191 /* Create the factorization matrix */ 3192 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3193 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3194 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3195 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3196 ierr = MatSetUp(B);CHKERRQ(ierr); 3197 3198 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3199 3200 B->ops->view = MatView_MUMPS; 3201 B->ops->getinfo = MatGetInfo_MUMPS; 3202 3203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3204 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3205 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3206 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3207 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3208 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3209 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3210 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3211 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3212 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3213 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3214 3215 if (ftype == MAT_FACTOR_LU) { 3216 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3217 B->factortype = MAT_FACTOR_LU; 3218 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3219 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3220 mumps->sym = 0; 3221 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3222 3223 /* set solvertype */ 3224 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3225 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3226 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3227 if (size == 1) { 3228 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3229 B->useordering = PETSC_TRUE; 3230 } 3231 3232 B->ops->destroy = MatDestroy_MUMPS; 3233 B->data = (void*)mumps; 3234 3235 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3236 3237 *F = B; 3238 PetscFunctionReturn(0); 3239 } 3240 3241 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3242 { 3243 PetscErrorCode ierr; 3244 3245 PetscFunctionBegin; 3246 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3247 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3248 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3249 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3250 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3251 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3252 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3253 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3254 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3255 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3256 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 3260