1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while(0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 #endif 170 Vec b_seq,x_seq; 171 PetscInt ninfo,*info; /* which INFO to display */ 172 PetscInt sizeredrhs; 173 PetscScalar *schur_sol; 174 PetscInt schur_sizesol; 175 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 176 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 177 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 178 179 /* stuff used by petsc/mumps OpenMP support*/ 180 PetscBool use_petsc_omp_support; 181 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 182 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 183 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 184 PetscMPIInt tag,omp_comm_size; 185 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 186 MPI_Request *reqs; 187 }; 188 189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 190 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 191 */ 192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 193 { 194 PetscErrorCode ierr; 195 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 196 197 PetscFunctionBegin; 198 #if defined(PETSC_USE_64BIT_INDICES) 199 { 200 PetscInt i; 201 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 202 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 203 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 204 mumps->cur_ilen = nrow+1; 205 } 206 if (nnz > mumps->cur_jlen) { 207 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 208 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 209 mumps->cur_jlen = nnz; 210 } 211 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 212 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 213 *ia_mumps = mumps->ia_alloc; 214 *ja_mumps = mumps->ja_alloc; 215 } 216 #else 217 *ia_mumps = ia; 218 *ja_mumps = ja; 219 #endif 220 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 225 { 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 231 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 232 mumps->id.size_schur = 0; 233 mumps->id.schur_lld = 0; 234 mumps->id.ICNTL(19) = 0; 235 PetscFunctionReturn(0); 236 } 237 238 /* solve with rhs in mumps->id.redrhs and return in the same location */ 239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 240 { 241 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 242 Mat S,B,X; 243 MatFactorSchurStatus schurstatus; 244 PetscInt sizesol; 245 PetscErrorCode ierr; 246 247 PetscFunctionBegin; 248 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 249 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 250 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 251 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 252 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 253 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 254 #endif 255 switch (schurstatus) { 256 case MAT_FACTOR_SCHUR_FACTORED: 257 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 258 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 260 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 261 #endif 262 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 263 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 264 } else { 265 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 266 } 267 break; 268 case MAT_FACTOR_SCHUR_INVERTED: 269 sizesol = mumps->id.nrhs*mumps->id.size_schur; 270 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 271 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 272 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 273 mumps->schur_sizesol = sizesol; 274 } 275 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 276 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 277 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 278 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 279 #endif 280 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 281 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 282 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 283 } else { 284 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 285 } 286 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 287 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 288 ierr = MatProductNumeric(X);CHKERRQ(ierr); 289 290 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 291 break; 292 default: 293 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 294 break; 295 } 296 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 297 ierr = MatDestroy(&B);CHKERRQ(ierr); 298 ierr = MatDestroy(&X);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 303 { 304 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 305 PetscErrorCode ierr; 306 307 PetscFunctionBegin; 308 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 309 PetscFunctionReturn(0); 310 } 311 if (!expansion) { /* prepare for the condensation step */ 312 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 313 /* allocate MUMPS internal array to store reduced right-hand sides */ 314 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 315 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 316 mumps->id.lredrhs = mumps->id.size_schur; 317 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 318 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 319 } 320 mumps->id.ICNTL(26) = 1; /* condensation phase */ 321 } else { /* prepare for the expansion step */ 322 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 323 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 324 mumps->id.ICNTL(26) = 2; /* expansion phase */ 325 PetscMUMPS_c(mumps); 326 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 327 /* restore defaults */ 328 mumps->id.ICNTL(26) = -1; 329 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 330 if (mumps->id.nrhs > 1) { 331 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 332 mumps->id.lredrhs = 0; 333 mumps->sizeredrhs = 0; 334 } 335 } 336 PetscFunctionReturn(0); 337 } 338 339 /* 340 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 341 342 input: 343 A - matrix in aij,baij or sbaij format 344 shift - 0: C style output triple; 1: Fortran style output triple. 345 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 346 MAT_REUSE_MATRIX: only the values in v array are updated 347 output: 348 nnz - dim of r, c, and v (number of local nonzero entries of A) 349 r, c, v - row and col index, matrix values (matrix triples) 350 351 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 352 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 353 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 354 355 */ 356 357 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 358 { 359 const PetscScalar *av; 360 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 361 PetscInt64 nz,rnz,i,j,k; 362 PetscErrorCode ierr; 363 PetscMUMPSInt *row,*col; 364 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 365 366 PetscFunctionBegin; 367 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 368 mumps->val = (PetscScalar*)av; 369 if (reuse == MAT_INITIAL_MATRIX) { 370 nz = aa->nz; 371 ai = aa->i; 372 aj = aa->j; 373 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 374 for (i=k=0; i<M; i++) { 375 rnz = ai[i+1] - ai[i]; 376 ajj = aj + ai[i]; 377 for (j=0; j<rnz; j++) { 378 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 379 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 380 k++; 381 } 382 } 383 mumps->irn = row; 384 mumps->jcn = col; 385 mumps->nnz = nz; 386 } 387 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 392 { 393 PetscErrorCode ierr; 394 PetscInt64 nz,i,j,k,r; 395 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 396 PetscMUMPSInt *row,*col; 397 398 PetscFunctionBegin; 399 mumps->val = a->val; 400 if (reuse == MAT_INITIAL_MATRIX) { 401 nz = a->sliidx[a->totalslices]; 402 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 403 for (i=k=0; i<a->totalslices; i++) { 404 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 405 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 406 } 407 } 408 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 409 mumps->irn = row; 410 mumps->jcn = col; 411 mumps->nnz = nz; 412 } 413 PetscFunctionReturn(0); 414 } 415 416 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 417 { 418 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 419 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 420 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 421 PetscInt bs; 422 PetscErrorCode ierr; 423 PetscMUMPSInt *row,*col; 424 425 PetscFunctionBegin; 426 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 427 M = A->rmap->N/bs; 428 mumps->val = aa->a; 429 if (reuse == MAT_INITIAL_MATRIX) { 430 ai = aa->i; aj = aa->j; 431 nz = bs2*aa->nz; 432 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 433 for (i=0; i<M; i++) { 434 ajj = aj + ai[i]; 435 rnz = ai[i+1] - ai[i]; 436 for (k=0; k<rnz; k++) { 437 for (j=0; j<bs; j++) { 438 for (m=0; m<bs; m++) { 439 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 440 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 441 idx++; 442 } 443 } 444 } 445 } 446 mumps->irn = row; 447 mumps->jcn = col; 448 mumps->nnz = nz; 449 } 450 PetscFunctionReturn(0); 451 } 452 453 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 454 { 455 const PetscInt *ai, *aj,*ajj; 456 PetscInt bs; 457 PetscInt64 nz,rnz,i,j,k,m; 458 PetscErrorCode ierr; 459 PetscMUMPSInt *row,*col; 460 PetscScalar *val; 461 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 462 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 463 #if defined(PETSC_USE_COMPLEX) 464 PetscBool hermitian; 465 #endif 466 467 PetscFunctionBegin; 468 #if defined(PETSC_USE_COMPLEX) 469 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 470 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 471 #endif 472 ai = aa->i; 473 aj = aa->j; 474 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 475 if (reuse == MAT_INITIAL_MATRIX) { 476 nz = aa->nz; 477 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 478 if (bs>1) { 479 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 480 mumps->val = mumps->val_alloc; 481 } else { 482 mumps->val = aa->a; 483 } 484 mumps->irn = row; 485 mumps->jcn = col; 486 } else { 487 if (bs == 1) mumps->val = aa->a; 488 row = mumps->irn; 489 col = mumps->jcn; 490 } 491 val = mumps->val; 492 493 nz = 0; 494 if (bs>1) { 495 for (i=0; i<mbs; i++) { 496 rnz = ai[i+1] - ai[i]; 497 ajj = aj + ai[i]; 498 for (j=0; j<rnz; j++) { 499 for (k=0; k<bs; k++) { 500 for (m=0; m<bs; m++) { 501 if (ajj[j]>i || k>=m) { 502 if (reuse == MAT_INITIAL_MATRIX) { 503 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 504 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 505 } 506 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 507 } 508 } 509 } 510 } 511 } 512 } else if (reuse == MAT_INITIAL_MATRIX) { 513 for (i=0; i<mbs; i++) { 514 rnz = ai[i+1] - ai[i]; 515 ajj = aj + ai[i]; 516 for (j=0; j<rnz; j++) { 517 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 518 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 519 nz++; 520 } 521 } 522 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 523 } 524 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 525 PetscFunctionReturn(0); 526 } 527 528 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 529 { 530 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 531 PetscInt64 nz,rnz,i,j; 532 const PetscScalar *av,*v1; 533 PetscScalar *val; 534 PetscErrorCode ierr; 535 PetscMUMPSInt *row,*col; 536 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 537 PetscBool missing; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 548 ai = aa->i; aj = aa->j; 549 adiag = aa->diag; 550 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 551 if (reuse == MAT_INITIAL_MATRIX) { 552 /* count nz in the upper triangular part of A */ 553 nz = 0; 554 if (missing) { 555 for (i=0; i<M; i++) { 556 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 557 for (j=ai[i];j<ai[i+1];j++) { 558 if (aj[j] < i) continue; 559 nz++; 560 } 561 } else { 562 nz += ai[i+1] - adiag[i]; 563 } 564 } 565 } else { 566 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 567 } 568 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 569 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 570 mumps->nnz = nz; 571 mumps->irn = row; 572 mumps->jcn = col; 573 mumps->val = mumps->val_alloc = val; 574 575 nz = 0; 576 if (missing) { 577 for (i=0; i<M; i++) { 578 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 579 for (j=ai[i];j<ai[i+1];j++) { 580 if (aj[j] < i) continue; 581 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 582 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 583 val[nz] = av[j]; 584 nz++; 585 } 586 } else { 587 rnz = ai[i+1] - adiag[i]; 588 ajj = aj + adiag[i]; 589 v1 = av + adiag[i]; 590 for (j=0; j<rnz; j++) { 591 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 592 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 593 val[nz++] = v1[j]; 594 } 595 } 596 } 597 } else { 598 for (i=0; i<M; i++) { 599 rnz = ai[i+1] - adiag[i]; 600 ajj = aj + adiag[i]; 601 v1 = av + adiag[i]; 602 for (j=0; j<rnz; j++) { 603 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 604 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 605 val[nz++] = v1[j]; 606 } 607 } 608 } 609 } else { 610 nz = 0; 611 val = mumps->val; 612 if (missing) { 613 for (i=0; i <M; i++) { 614 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 615 for (j=ai[i];j<ai[i+1];j++) { 616 if (aj[j] < i) continue; 617 val[nz++] = av[j]; 618 } 619 } else { 620 rnz = ai[i+1] - adiag[i]; 621 v1 = av + adiag[i]; 622 for (j=0; j<rnz; j++) { 623 val[nz++] = v1[j]; 624 } 625 } 626 } 627 } else { 628 for (i=0; i <M; i++) { 629 rnz = ai[i+1] - adiag[i]; 630 v1 = av + adiag[i]; 631 for (j=0; j<rnz; j++) { 632 val[nz++] = v1[j]; 633 } 634 } 635 } 636 } 637 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 638 PetscFunctionReturn(0); 639 } 640 641 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 642 { 643 PetscErrorCode ierr; 644 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 645 PetscInt bs; 646 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 647 PetscMUMPSInt *row,*col; 648 const PetscScalar *av,*bv,*v1,*v2; 649 PetscScalar *val; 650 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 651 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 652 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 653 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 654 #if defined(PETSC_USE_COMPLEX) 655 PetscBool hermitian; 656 #endif 657 658 PetscFunctionBegin; 659 #if defined(PETSC_USE_COMPLEX) 660 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 661 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 662 #endif 663 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 664 rstart = A->rmap->rstart; 665 ai = aa->i; 666 aj = aa->j; 667 bi = bb->i; 668 bj = bb->j; 669 av = aa->a; 670 bv = bb->a; 671 672 garray = mat->garray; 673 674 if (reuse == MAT_INITIAL_MATRIX) { 675 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 676 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 677 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 678 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 679 mumps->irn = row; 680 mumps->jcn = col; 681 mumps->val = mumps->val_alloc = val; 682 } else { 683 val = mumps->val; 684 } 685 686 jj = 0; irow = rstart; 687 for (i=0; i<mbs; i++) { 688 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 689 countA = ai[i+1] - ai[i]; 690 countB = bi[i+1] - bi[i]; 691 bjj = bj + bi[i]; 692 v1 = av + ai[i]*bs2; 693 v2 = bv + bi[i]*bs2; 694 695 if (bs>1) { 696 /* A-part */ 697 for (j=0; j<countA; j++) { 698 for (k=0; k<bs; k++) { 699 for (m=0; m<bs; m++) { 700 if (rstart + ajj[j]*bs>irow || k>=m) { 701 if (reuse == MAT_INITIAL_MATRIX) { 702 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 703 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 704 } 705 val[jj++] = v1[j*bs2 + m + k*bs]; 706 } 707 } 708 } 709 } 710 711 /* B-part */ 712 for (j=0; j < countB; j++) { 713 for (k=0; k<bs; k++) { 714 for (m=0; m<bs; m++) { 715 if (reuse == MAT_INITIAL_MATRIX) { 716 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 717 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 718 } 719 val[jj++] = v2[j*bs2 + m + k*bs]; 720 } 721 } 722 } 723 } else { 724 /* A-part */ 725 for (j=0; j<countA; j++) { 726 if (reuse == MAT_INITIAL_MATRIX) { 727 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 728 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 729 } 730 val[jj++] = v1[j]; 731 } 732 733 /* B-part */ 734 for (j=0; j < countB; j++) { 735 if (reuse == MAT_INITIAL_MATRIX) { 736 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 737 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 738 } 739 val[jj++] = v2[j]; 740 } 741 } 742 irow+=bs; 743 } 744 mumps->nnz = jj; 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 749 { 750 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 751 PetscErrorCode ierr; 752 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 753 PetscMUMPSInt *row,*col; 754 const PetscScalar *av, *bv,*v1,*v2; 755 PetscScalar *val; 756 Mat Ad,Ao; 757 Mat_SeqAIJ *aa; 758 Mat_SeqAIJ *bb; 759 760 PetscFunctionBegin; 761 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 762 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 763 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 764 765 aa = (Mat_SeqAIJ*)(Ad)->data; 766 bb = (Mat_SeqAIJ*)(Ao)->data; 767 ai = aa->i; 768 aj = aa->j; 769 bi = bb->i; 770 bj = bb->j; 771 772 rstart = A->rmap->rstart; 773 774 if (reuse == MAT_INITIAL_MATRIX) { 775 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 776 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 777 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 778 mumps->nnz = nz; 779 mumps->irn = row; 780 mumps->jcn = col; 781 mumps->val = mumps->val_alloc = val; 782 } else { 783 val = mumps->val; 784 } 785 786 jj = 0; irow = rstart; 787 for (i=0; i<m; i++) { 788 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 789 countA = ai[i+1] - ai[i]; 790 countB = bi[i+1] - bi[i]; 791 bjj = bj + bi[i]; 792 v1 = av + ai[i]; 793 v2 = bv + bi[i]; 794 795 /* A-part */ 796 for (j=0; j<countA; j++) { 797 if (reuse == MAT_INITIAL_MATRIX) { 798 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 799 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 800 } 801 val[jj++] = v1[j]; 802 } 803 804 /* B-part */ 805 for (j=0; j < countB; j++) { 806 if (reuse == MAT_INITIAL_MATRIX) { 807 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 808 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 809 } 810 val[jj++] = v2[j]; 811 } 812 irow++; 813 } 814 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 815 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 820 { 821 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 822 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 823 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 824 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 825 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 826 const PetscInt bs2=mat->bs2; 827 PetscErrorCode ierr; 828 PetscInt bs; 829 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 830 PetscMUMPSInt *row,*col; 831 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 832 PetscScalar *val; 833 834 PetscFunctionBegin; 835 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 836 if (reuse == MAT_INITIAL_MATRIX) { 837 nz = bs2*(aa->nz + bb->nz); 838 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 839 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 840 mumps->nnz = nz; 841 mumps->irn = row; 842 mumps->jcn = col; 843 mumps->val = mumps->val_alloc = val; 844 } else { 845 val = mumps->val; 846 } 847 848 jj = 0; irow = rstart; 849 for (i=0; i<mbs; i++) { 850 countA = ai[i+1] - ai[i]; 851 countB = bi[i+1] - bi[i]; 852 ajj = aj + ai[i]; 853 bjj = bj + bi[i]; 854 v1 = av + bs2*ai[i]; 855 v2 = bv + bs2*bi[i]; 856 857 idx = 0; 858 /* A-part */ 859 for (k=0; k<countA; k++) { 860 for (j=0; j<bs; j++) { 861 for (n=0; n<bs; n++) { 862 if (reuse == MAT_INITIAL_MATRIX) { 863 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 864 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 865 } 866 val[jj++] = v1[idx++]; 867 } 868 } 869 } 870 871 idx = 0; 872 /* B-part */ 873 for (k=0; k<countB; k++) { 874 for (j=0; j<bs; j++) { 875 for (n=0; n<bs; n++) { 876 if (reuse == MAT_INITIAL_MATRIX) { 877 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 878 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 879 } 880 val[jj++] = v2[idx++]; 881 } 882 } 883 } 884 irow += bs; 885 } 886 PetscFunctionReturn(0); 887 } 888 889 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 890 { 891 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 892 PetscErrorCode ierr; 893 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 894 PetscMUMPSInt *row,*col; 895 const PetscScalar *av, *bv,*v1,*v2; 896 PetscScalar *val; 897 Mat Ad,Ao; 898 Mat_SeqAIJ *aa; 899 Mat_SeqAIJ *bb; 900 #if defined(PETSC_USE_COMPLEX) 901 PetscBool hermitian; 902 #endif 903 904 PetscFunctionBegin; 905 #if defined(PETSC_USE_COMPLEX) 906 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 907 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 908 #endif 909 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 911 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 912 913 aa = (Mat_SeqAIJ*)(Ad)->data; 914 bb = (Mat_SeqAIJ*)(Ao)->data; 915 ai = aa->i; 916 aj = aa->j; 917 adiag = aa->diag; 918 bi = bb->i; 919 bj = bb->j; 920 921 rstart = A->rmap->rstart; 922 923 if (reuse == MAT_INITIAL_MATRIX) { 924 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 925 nzb = 0; /* num of upper triangular entries in mat->B */ 926 for (i=0; i<m; i++) { 927 nza += (ai[i+1] - adiag[i]); 928 countB = bi[i+1] - bi[i]; 929 bjj = bj + bi[i]; 930 for (j=0; j<countB; j++) { 931 if (garray[bjj[j]] > rstart) nzb++; 932 } 933 } 934 935 nz = nza + nzb; /* total nz of upper triangular part of mat */ 936 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 937 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 938 mumps->nnz = nz; 939 mumps->irn = row; 940 mumps->jcn = col; 941 mumps->val = mumps->val_alloc = val; 942 } else { 943 val = mumps->val; 944 } 945 946 jj = 0; irow = rstart; 947 for (i=0; i<m; i++) { 948 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 949 v1 = av + adiag[i]; 950 countA = ai[i+1] - adiag[i]; 951 countB = bi[i+1] - bi[i]; 952 bjj = bj + bi[i]; 953 v2 = bv + bi[i]; 954 955 /* A-part */ 956 for (j=0; j<countA; j++) { 957 if (reuse == MAT_INITIAL_MATRIX) { 958 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 959 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 960 } 961 val[jj++] = v1[j]; 962 } 963 964 /* B-part */ 965 for (j=0; j < countB; j++) { 966 if (garray[bjj[j]] > rstart) { 967 if (reuse == MAT_INITIAL_MATRIX) { 968 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 969 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 970 } 971 val[jj++] = v2[j]; 972 } 973 } 974 irow++; 975 } 976 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 977 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatDestroy_MUMPS(Mat A) 982 { 983 PetscErrorCode ierr; 984 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 985 986 PetscFunctionBegin; 987 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 988 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 989 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 990 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 991 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 993 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 994 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 995 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 996 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 997 mumps->id.job = JOB_END; 998 PetscMUMPS_c(mumps); 999 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1000 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1001 if (mumps->use_petsc_omp_support) { 1002 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1003 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1004 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1005 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1006 #endif 1007 } 1008 #endif 1009 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1010 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1011 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1012 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1013 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1014 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1015 #endif 1016 ierr = PetscFree(A->data);CHKERRQ(ierr); 1017 1018 /* clear composed functions */ 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1028 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1029 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1030 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1031 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1036 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1037 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1038 { 1039 PetscErrorCode ierr; 1040 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1041 const PetscMPIInt ompsize=mumps->omp_comm_size; 1042 PetscInt i,m,M,rstart; 1043 1044 PetscFunctionBegin; 1045 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1046 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1047 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1048 if (ompsize == 1) { 1049 if (!mumps->irhs_loc) { 1050 mumps->nloc_rhs = m; 1051 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1052 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1053 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1054 } 1055 mumps->id.rhs_loc = (MumpsScalar*)array; 1056 } else { 1057 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1058 const PetscInt *ranges; 1059 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1060 MPI_Group petsc_group,omp_group; 1061 PetscScalar *recvbuf=NULL; 1062 1063 if (mumps->is_omp_master) { 1064 /* Lazily initialize the omp stuff for distributed rhs */ 1065 if (!mumps->irhs_loc) { 1066 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1067 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1068 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr); 1069 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr); 1070 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1071 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr); 1072 1073 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1074 mumps->nloc_rhs = 0; 1075 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1076 for (j=0; j<ompsize; j++) { 1077 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1078 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1079 } 1080 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1081 for (j=k=0; j<ompsize; j++) { 1082 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1083 } 1084 1085 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1086 ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr); 1087 ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr); 1088 } 1089 1090 /* Realloc buffers when current nrhs is bigger than what we have met */ 1091 if (nrhs > mumps->max_nrhs) { 1092 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1093 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1094 mumps->max_nrhs = nrhs; 1095 } 1096 1097 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1098 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1099 mumps->rhs_disps[0] = 0; 1100 for (j=1; j<ompsize; j++) { 1101 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1102 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1103 } 1104 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1105 } 1106 1107 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1108 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr); 1109 1110 if (mumps->is_omp_master) { 1111 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1112 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1113 for (j=0; j<ompsize; j++) { 1114 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1115 dst = dstbase; 1116 for (i=0; i<nrhs; i++) { 1117 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1118 src += mumps->rhs_nrow[j]; 1119 dst += mumps->nloc_rhs; 1120 } 1121 dstbase += mumps->rhs_nrow[j]; 1122 } 1123 } 1124 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1125 } 1126 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1127 } 1128 mumps->id.nrhs = nrhs; 1129 mumps->id.nloc_rhs = mumps->nloc_rhs; 1130 mumps->id.lrhs_loc = mumps->nloc_rhs; 1131 mumps->id.irhs_loc = mumps->irhs_loc; 1132 PetscFunctionReturn(0); 1133 } 1134 #endif /* PETSC_PKG_MUMPS_VERSION_GE(5,3,0) */ 1135 1136 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1137 { 1138 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1139 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1140 const PetscScalar *rarray; 1141 #endif 1142 PetscScalar *array; 1143 IS is_iden,is_petsc; 1144 PetscErrorCode ierr; 1145 PetscInt i; 1146 PetscBool second_solve = PETSC_FALSE; 1147 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1148 1149 PetscFunctionBegin; 1150 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1151 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1152 1153 if (A->factorerrortype) { 1154 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1155 ierr = VecSetInf(x);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 mumps->id.nrhs = 1; 1160 if (mumps->petsc_size > 1) { 1161 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1162 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1163 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1164 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1165 #else 1166 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/ 1167 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 if (!mumps->myid) { 1170 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1171 mumps->id.rhs = (MumpsScalar*)array; 1172 } 1173 #endif 1174 } else { /* petsc_size == 1 */ 1175 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1176 ierr = VecCopy(b,x);CHKERRQ(ierr); 1177 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1178 mumps->id.rhs = (MumpsScalar*)array; 1179 } 1180 1181 /* 1182 handle condensation step of Schur complement (if any) 1183 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1184 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1185 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1186 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1187 */ 1188 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1189 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1190 second_solve = PETSC_TRUE; 1191 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1192 } 1193 /* solve phase */ 1194 /*-------------*/ 1195 mumps->id.job = JOB_SOLVE; 1196 PetscMUMPS_c(mumps); 1197 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1198 1199 /* handle expansion step of Schur complement (if any) */ 1200 if (second_solve) { 1201 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1202 } 1203 1204 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1205 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1206 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1207 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1208 } 1209 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1210 PetscInt *isol2_loc=NULL; 1211 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1212 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1213 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1215 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1216 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1218 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1219 } 1220 1221 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1223 } 1224 1225 if (mumps->petsc_size > 1) { 1226 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1227 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1228 #else 1229 if (!mumps->myid) {ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);} 1230 #endif 1231 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1232 1233 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1238 { 1239 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 mumps->id.ICNTL(9) = 0; 1244 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1245 mumps->id.ICNTL(9) = 1; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1250 { 1251 PetscErrorCode ierr; 1252 Mat Bt = NULL; 1253 PetscBool denseX,denseB,flg,flgT; 1254 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1255 PetscInt i,nrhs,M; 1256 PetscScalar *array; 1257 const PetscScalar *rbray; 1258 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1259 PetscMUMPSInt *isol_loc,*isol_loc_save; 1260 PetscScalar *bray,*sol_loc,*sol_loc_save; 1261 IS is_to,is_from; 1262 PetscInt k,proc,j,m,myrstart; 1263 const PetscInt *rstart; 1264 Vec v_mpi,msol_loc; 1265 VecScatter scat_sol; 1266 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1267 Vec b_seq; 1268 VecScatter scat_rhs; 1269 #endif 1270 PetscScalar *aa; 1271 PetscInt spnr,*ia,*ja; 1272 Mat_MPIAIJ *b = NULL; 1273 1274 PetscFunctionBegin; 1275 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1276 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1277 1278 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1279 if (denseB) { 1280 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1281 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1282 } else { /* sparse B */ 1283 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1284 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1285 if (flgT) { /* input B is transpose of actural RHS matrix, 1286 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1287 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1288 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1289 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1290 } 1291 1292 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1293 mumps->id.nrhs = nrhs; 1294 mumps->id.lrhs = M; 1295 mumps->id.rhs = NULL; 1296 1297 if (mumps->petsc_size == 1) { 1298 PetscScalar *aa; 1299 PetscInt spnr,*ia,*ja; 1300 PetscBool second_solve = PETSC_FALSE; 1301 1302 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1303 mumps->id.rhs = (MumpsScalar*)array; 1304 1305 if (denseB) { 1306 /* copy B to X */ 1307 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1308 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1309 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1310 } else { /* sparse B */ 1311 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1312 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1313 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1314 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1315 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1316 } 1317 /* handle condensation step of Schur complement (if any) */ 1318 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1319 second_solve = PETSC_TRUE; 1320 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1321 } 1322 /* solve phase */ 1323 /*-------------*/ 1324 mumps->id.job = JOB_SOLVE; 1325 PetscMUMPS_c(mumps); 1326 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1327 1328 /* handle expansion step of Schur complement (if any) */ 1329 if (second_solve) { 1330 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1331 } 1332 if (!denseB) { /* sparse B */ 1333 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1334 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1335 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1336 } 1337 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1342 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1343 1344 /* create msol_loc to hold mumps local solution */ 1345 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1346 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1347 1348 lsol_loc = mumps->id.lsol_loc; 1349 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1350 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1351 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1352 mumps->id.isol_loc = isol_loc; 1353 1354 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1355 1356 if (denseB) { 1357 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 1358 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1359 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1360 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1361 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1362 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1363 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1364 #else 1365 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1366 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1367 0, re-arrange B into desired order, which is a local operation. 1368 */ 1369 1370 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1371 /* wrap dense rhs matrix B into a vector v_mpi */ 1372 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1373 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1374 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1375 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1376 1377 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1378 if (!mumps->myid) { 1379 PetscInt *idx; 1380 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1381 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1382 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1383 k = 0; 1384 for (proc=0; proc<mumps->petsc_size; proc++){ 1385 for (j=0; j<nrhs; j++){ 1386 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1387 } 1388 } 1389 1390 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1391 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1392 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1393 } else { 1394 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1395 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1396 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1397 } 1398 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1399 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1400 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1401 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1402 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1403 1404 if (!mumps->myid) { /* define rhs on the host */ 1405 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1406 mumps->id.rhs = (MumpsScalar*)bray; 1407 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1408 } 1409 #endif 1410 } else { /* sparse B */ 1411 b = (Mat_MPIAIJ*)Bt->data; 1412 1413 /* wrap dense X into a vector v_mpi */ 1414 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1415 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1416 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1417 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1418 1419 if (!mumps->myid) { 1420 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1421 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1422 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1423 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1424 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1425 } else { 1426 mumps->id.irhs_ptr = NULL; 1427 mumps->id.irhs_sparse = NULL; 1428 mumps->id.nz_rhs = 0; 1429 mumps->id.rhs_sparse = NULL; 1430 } 1431 } 1432 1433 /* solve phase */ 1434 /*-------------*/ 1435 mumps->id.job = JOB_SOLVE; 1436 PetscMUMPS_c(mumps); 1437 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1438 1439 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1440 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1441 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1442 1443 /* create scatter scat_sol */ 1444 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1445 /* iidx: index for scatter mumps solution to petsc X */ 1446 1447 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1448 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1449 for (i=0; i<lsol_loc; i++) { 1450 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1451 1452 for (proc=0; proc<mumps->petsc_size; proc++){ 1453 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1454 myrstart = rstart[proc]; 1455 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1456 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1457 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1458 break; 1459 } 1460 } 1461 1462 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1463 } 1464 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1465 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1466 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1468 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1469 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1471 1472 /* free spaces */ 1473 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1474 mumps->id.isol_loc = isol_loc_save; 1475 1476 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1477 ierr = PetscFree(idxx);CHKERRQ(ierr); 1478 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1479 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1480 if (!denseB) { 1481 if (!mumps->myid) { 1482 b = (Mat_MPIAIJ*)Bt->data; 1483 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1484 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1485 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1486 } 1487 } else { 1488 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1489 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1490 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1491 #endif 1492 } 1493 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1494 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1499 { 1500 PetscErrorCode ierr; 1501 PetscBool flg; 1502 Mat B; 1503 1504 PetscFunctionBegin; 1505 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1506 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1507 1508 /* Create B=Bt^T that uses Bt's data structure */ 1509 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1510 1511 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1512 ierr = MatDestroy(&B);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #if !defined(PETSC_USE_COMPLEX) 1517 /* 1518 input: 1519 F: numeric factor 1520 output: 1521 nneg: total number of negative pivots 1522 nzero: total number of zero pivots 1523 npos: (global dimension of F) - nneg - nzero 1524 */ 1525 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1526 { 1527 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1528 PetscErrorCode ierr; 1529 PetscMPIInt size; 1530 1531 PetscFunctionBegin; 1532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1533 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1534 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1535 1536 if (nneg) *nneg = mumps->id.INFOG(12); 1537 if (nzero || npos) { 1538 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1539 if (nzero) *nzero = mumps->id.INFOG(28); 1540 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1541 } 1542 PetscFunctionReturn(0); 1543 } 1544 #endif 1545 1546 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1547 { 1548 PetscErrorCode ierr; 1549 PetscInt i,nreqs; 1550 PetscMUMPSInt *irn,*jcn; 1551 PetscMPIInt count; 1552 PetscInt64 totnnz,remain; 1553 const PetscInt osize=mumps->omp_comm_size; 1554 PetscScalar *val; 1555 1556 PetscFunctionBegin; 1557 if (osize > 1) { 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 /* master first gathers counts of nonzeros to receive */ 1560 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1561 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1562 1563 /* Then each computes number of send/recvs */ 1564 if (mumps->is_omp_master) { 1565 /* Start from 1 since self communication is not done in MPI */ 1566 nreqs = 0; 1567 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1568 } else { 1569 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1570 } 1571 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1572 1573 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1574 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1575 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1576 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1577 */ 1578 nreqs = 0; /* counter for actual send/recvs */ 1579 if (mumps->is_omp_master) { 1580 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1581 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1582 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1583 1584 /* Self communication */ 1585 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1586 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1587 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1588 1589 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1590 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1591 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1592 mumps->nnz = totnnz; 1593 mumps->irn = irn; 1594 mumps->jcn = jcn; 1595 mumps->val = mumps->val_alloc = val; 1596 1597 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1598 jcn += mumps->recvcount[0]; 1599 val += mumps->recvcount[0]; 1600 1601 /* Remote communication */ 1602 for (i=1; i<osize; i++) { 1603 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1604 remain = mumps->recvcount[i] - count; 1605 while (count>0) { 1606 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1607 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1608 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1609 irn += count; 1610 jcn += count; 1611 val += count; 1612 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1613 remain -= count; 1614 } 1615 } 1616 } else { 1617 irn = mumps->irn; 1618 jcn = mumps->jcn; 1619 val = mumps->val; 1620 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1621 remain = mumps->nnz - count; 1622 while (count>0) { 1623 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1624 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1625 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1626 irn += count; 1627 jcn += count; 1628 val += count; 1629 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1630 remain -= count; 1631 } 1632 } 1633 } else { 1634 nreqs = 0; 1635 if (mumps->is_omp_master) { 1636 val = mumps->val + mumps->recvcount[0]; 1637 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1638 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1639 remain = mumps->recvcount[i] - count; 1640 while (count>0) { 1641 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1642 val += count; 1643 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1644 remain -= count; 1645 } 1646 } 1647 } else { 1648 val = mumps->val; 1649 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1650 remain = mumps->nnz - count; 1651 while (count>0) { 1652 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1653 val += count; 1654 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1655 remain -= count; 1656 } 1657 } 1658 } 1659 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1660 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1666 { 1667 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1668 PetscErrorCode ierr; 1669 PetscBool isMPIAIJ; 1670 1671 PetscFunctionBegin; 1672 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1673 if (mumps->id.INFOG(1) == -6) { 1674 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1675 } 1676 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1681 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1682 1683 /* numerical factorization phase */ 1684 /*-------------------------------*/ 1685 mumps->id.job = JOB_FACTNUMERIC; 1686 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1687 if (!mumps->myid) { 1688 mumps->id.a = (MumpsScalar*)mumps->val; 1689 } 1690 } else { 1691 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1692 } 1693 PetscMUMPS_c(mumps); 1694 if (mumps->id.INFOG(1) < 0) { 1695 if (A->erroriffailure) { 1696 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1697 } else { 1698 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1699 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1700 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1701 } else if (mumps->id.INFOG(1) == -13) { 1702 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1703 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1704 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1705 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1706 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1707 } else { 1708 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1709 F->factorerrortype = MAT_FACTOR_OTHER; 1710 } 1711 } 1712 } 1713 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1714 1715 F->assembled = PETSC_TRUE; 1716 mumps->matstruc = SAME_NONZERO_PATTERN; 1717 if (F->schur) { /* reset Schur status to unfactored */ 1718 #if defined(PETSC_HAVE_CUDA) 1719 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1720 #endif 1721 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1722 mumps->id.ICNTL(19) = 2; 1723 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1724 } 1725 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1726 } 1727 1728 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1729 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1730 1731 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1732 if (mumps->petsc_size > 1) { 1733 PetscInt lsol_loc; 1734 PetscScalar *sol_loc; 1735 1736 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1737 1738 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1739 if (mumps->x_seq) { 1740 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1741 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1742 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1743 } 1744 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1745 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1746 mumps->id.lsol_loc = lsol_loc; 1747 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1748 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1749 } 1750 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 /* Sets MUMPS options from the options database */ 1755 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1756 { 1757 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1758 PetscErrorCode ierr; 1759 PetscMUMPSInt icntl=0; 1760 PetscInt info[80],i,ninfo=80; 1761 PetscBool flg=PETSC_FALSE; 1762 1763 PetscFunctionBegin; 1764 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1765 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1766 if (flg) mumps->id.ICNTL(1) = icntl; 1767 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1768 if (flg) mumps->id.ICNTL(2) = icntl; 1769 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1770 if (flg) mumps->id.ICNTL(3) = icntl; 1771 1772 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1773 if (flg) mumps->id.ICNTL(4) = icntl; 1774 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1775 1776 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1777 if (flg) mumps->id.ICNTL(6) = icntl; 1778 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1780 if (flg) { 1781 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1782 else mumps->id.ICNTL(7) = icntl; 1783 } 1784 1785 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1786 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1787 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1788 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1789 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1790 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1791 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1792 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1793 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1794 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1795 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1796 } 1797 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1798 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1799 1800 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1801 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1802 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1803 if (mumps->id.ICNTL(24)) { 1804 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1805 } 1806 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1808 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1809 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1810 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1811 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1812 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1814 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1817 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1819 1820 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1821 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1822 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1824 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1825 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1826 1827 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1828 1829 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1830 if (ninfo) { 1831 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1832 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1833 mumps->ninfo = ninfo; 1834 for (i=0; i<ninfo; i++) { 1835 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1836 else mumps->info[i] = info[i]; 1837 } 1838 } 1839 1840 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1845 { 1846 PetscErrorCode ierr; 1847 PetscInt nthreads=0; 1848 1849 PetscFunctionBegin; 1850 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1851 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1852 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1853 1854 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1855 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1856 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1857 if (mumps->use_petsc_omp_support) { 1858 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1859 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1860 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1861 #else 1862 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1863 #endif 1864 } else { 1865 mumps->omp_comm = PETSC_COMM_SELF; 1866 mumps->mumps_comm = mumps->petsc_comm; 1867 mumps->is_omp_master = PETSC_TRUE; 1868 } 1869 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1870 mumps->reqs = NULL; 1871 mumps->tag = 0; 1872 1873 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1874 mumps->id.job = JOB_INIT; 1875 mumps->id.par = 1; /* host participates factorizaton and solve */ 1876 mumps->id.sym = mumps->sym; 1877 1878 PetscMUMPS_c(mumps); 1879 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1880 1881 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1882 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1883 */ 1884 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1885 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1886 1887 mumps->scat_rhs = NULL; 1888 mumps->scat_sol = NULL; 1889 1890 /* set PETSc-MUMPS default options - override MUMPS default */ 1891 mumps->id.ICNTL(3) = 0; 1892 mumps->id.ICNTL(4) = 0; 1893 if (mumps->petsc_size == 1) { 1894 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1895 } else { 1896 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1897 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1898 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1899 } 1900 1901 /* schur */ 1902 mumps->id.size_schur = 0; 1903 mumps->id.listvar_schur = NULL; 1904 mumps->id.schur = NULL; 1905 mumps->sizeredrhs = 0; 1906 mumps->schur_sol = NULL; 1907 mumps->schur_sizesol = 0; 1908 PetscFunctionReturn(0); 1909 } 1910 1911 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1912 { 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 if (mumps->id.INFOG(1) < 0) { 1917 if (A->erroriffailure) { 1918 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1919 } else { 1920 if (mumps->id.INFOG(1) == -6) { 1921 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1922 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1923 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1924 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1925 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1926 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1927 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1928 } else { 1929 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1930 F->factorerrortype = MAT_FACTOR_OTHER; 1931 } 1932 } 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1938 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1939 { 1940 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1941 PetscErrorCode ierr; 1942 Vec b; 1943 const PetscInt M = A->rmap->N; 1944 1945 PetscFunctionBegin; 1946 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1947 1948 /* Set MUMPS options from the options database */ 1949 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1950 1951 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1952 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1953 1954 /* analysis phase */ 1955 /*----------------*/ 1956 mumps->id.job = JOB_FACTSYMBOLIC; 1957 mumps->id.n = M; 1958 switch (mumps->id.ICNTL(18)) { 1959 case 0: /* centralized assembled matrix input */ 1960 if (!mumps->myid) { 1961 mumps->id.nnz = mumps->nnz; 1962 mumps->id.irn = mumps->irn; 1963 mumps->id.jcn = mumps->jcn; 1964 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1965 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1966 /* 1967 PetscBool flag; 1968 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1969 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1970 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1971 */ 1972 if (!mumps->myid) { 1973 const PetscInt *idx; 1974 PetscInt i; 1975 1976 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1977 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1978 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1979 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1980 } 1981 } 1982 } 1983 break; 1984 case 3: /* distributed assembled matrix input (size>1) */ 1985 mumps->id.nnz_loc = mumps->nnz; 1986 mumps->id.irn_loc = mumps->irn; 1987 mumps->id.jcn_loc = mumps->jcn; 1988 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1989 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1990 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1991 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1992 ierr = VecDestroy(&b);CHKERRQ(ierr); 1993 break; 1994 } 1995 PetscMUMPS_c(mumps); 1996 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1997 1998 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1999 F->ops->solve = MatSolve_MUMPS; 2000 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2001 F->ops->matsolve = MatMatSolve_MUMPS; 2002 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 /* Note the Petsc r and c permutations are ignored */ 2007 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2008 { 2009 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2010 PetscErrorCode ierr; 2011 Vec b; 2012 const PetscInt M = A->rmap->N; 2013 2014 PetscFunctionBegin; 2015 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2016 2017 /* Set MUMPS options from the options database */ 2018 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2019 2020 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2021 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2022 2023 /* analysis phase */ 2024 /*----------------*/ 2025 mumps->id.job = JOB_FACTSYMBOLIC; 2026 mumps->id.n = M; 2027 switch (mumps->id.ICNTL(18)) { 2028 case 0: /* centralized assembled matrix input */ 2029 if (!mumps->myid) { 2030 mumps->id.nnz = mumps->nnz; 2031 mumps->id.irn = mumps->irn; 2032 mumps->id.jcn = mumps->jcn; 2033 if (mumps->id.ICNTL(6)>1) { 2034 mumps->id.a = (MumpsScalar*)mumps->val; 2035 } 2036 } 2037 break; 2038 case 3: /* distributed assembled matrix input (size>1) */ 2039 mumps->id.nnz_loc = mumps->nnz; 2040 mumps->id.irn_loc = mumps->irn; 2041 mumps->id.jcn_loc = mumps->jcn; 2042 if (mumps->id.ICNTL(6)>1) { 2043 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2044 } 2045 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2046 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2047 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2048 ierr = VecDestroy(&b);CHKERRQ(ierr); 2049 break; 2050 } 2051 PetscMUMPS_c(mumps); 2052 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2053 2054 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2055 F->ops->solve = MatSolve_MUMPS; 2056 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2057 PetscFunctionReturn(0); 2058 } 2059 2060 /* Note the Petsc r permutation and factor info are ignored */ 2061 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2062 { 2063 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2064 PetscErrorCode ierr; 2065 Vec b; 2066 const PetscInt M = A->rmap->N; 2067 2068 PetscFunctionBegin; 2069 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2070 2071 /* Set MUMPS options from the options database */ 2072 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2073 2074 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2075 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2076 2077 /* analysis phase */ 2078 /*----------------*/ 2079 mumps->id.job = JOB_FACTSYMBOLIC; 2080 mumps->id.n = M; 2081 switch (mumps->id.ICNTL(18)) { 2082 case 0: /* centralized assembled matrix input */ 2083 if (!mumps->myid) { 2084 mumps->id.nnz = mumps->nnz; 2085 mumps->id.irn = mumps->irn; 2086 mumps->id.jcn = mumps->jcn; 2087 if (mumps->id.ICNTL(6)>1) { 2088 mumps->id.a = (MumpsScalar*)mumps->val; 2089 } 2090 } 2091 break; 2092 case 3: /* distributed assembled matrix input (size>1) */ 2093 mumps->id.nnz_loc = mumps->nnz; 2094 mumps->id.irn_loc = mumps->irn; 2095 mumps->id.jcn_loc = mumps->jcn; 2096 if (mumps->id.ICNTL(6)>1) { 2097 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2098 } 2099 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2100 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2101 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2102 ierr = VecDestroy(&b);CHKERRQ(ierr); 2103 break; 2104 } 2105 PetscMUMPS_c(mumps); 2106 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2107 2108 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2109 F->ops->solve = MatSolve_MUMPS; 2110 F->ops->solvetranspose = MatSolve_MUMPS; 2111 F->ops->matsolve = MatMatSolve_MUMPS; 2112 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2113 #if defined(PETSC_USE_COMPLEX) 2114 F->ops->getinertia = NULL; 2115 #else 2116 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2117 #endif 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2122 { 2123 PetscErrorCode ierr; 2124 PetscBool iascii; 2125 PetscViewerFormat format; 2126 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2127 2128 PetscFunctionBegin; 2129 /* check if matrix is mumps type */ 2130 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2131 2132 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2133 if (iascii) { 2134 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2135 if (format == PETSC_VIEWER_ASCII_INFO) { 2136 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2137 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 2138 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 2139 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2140 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2141 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2142 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2143 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2144 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2145 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2146 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2149 if (mumps->id.ICNTL(11)>0) { 2150 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2151 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2155 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2156 } 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2158 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2159 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2160 /* ICNTL(15-17) not used */ 2161 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2167 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2174 2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2176 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2177 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2181 2182 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2183 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2184 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2185 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2188 2189 /* infomation local to each processor */ 2190 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2191 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2192 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2195 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2196 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2197 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2198 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2199 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2200 2201 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2202 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2203 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2204 2205 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2206 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2207 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2208 2209 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2210 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2211 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2212 2213 if (mumps->ninfo && mumps->ninfo <= 80){ 2214 PetscInt i; 2215 for (i=0; i<mumps->ninfo; i++){ 2216 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2217 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2218 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2219 } 2220 } 2221 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2222 2223 if (!mumps->myid) { /* information from the host */ 2224 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2225 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2226 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2227 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2228 2229 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2230 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2231 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2232 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2233 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2234 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2237 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2263 } 2264 } 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2270 { 2271 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2272 2273 PetscFunctionBegin; 2274 info->block_size = 1.0; 2275 info->nz_allocated = mumps->id.INFOG(20); 2276 info->nz_used = mumps->id.INFOG(20); 2277 info->nz_unneeded = 0.0; 2278 info->assemblies = 0.0; 2279 info->mallocs = 0.0; 2280 info->memory = 0.0; 2281 info->fill_ratio_given = 0; 2282 info->fill_ratio_needed = 0; 2283 info->factor_mallocs = 0; 2284 PetscFunctionReturn(0); 2285 } 2286 2287 /* -------------------------------------------------------------------------------------------*/ 2288 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2289 { 2290 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2291 const PetscScalar *arr; 2292 const PetscInt *idxs; 2293 PetscInt size,i; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2298 if (mumps->petsc_size > 1) { 2299 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2300 2301 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2302 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2303 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2304 } 2305 2306 /* Schur complement matrix */ 2307 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2308 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2309 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2310 mumps->id.schur = (MumpsScalar*)arr; 2311 mumps->id.size_schur = size; 2312 mumps->id.schur_lld = size; 2313 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2314 if (mumps->sym == 1) { 2315 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2316 } 2317 2318 /* MUMPS expects Fortran style indices */ 2319 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2320 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2321 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2322 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2323 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2324 if (mumps->petsc_size > 1) { 2325 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2326 } else { 2327 if (F->factortype == MAT_FACTOR_LU) { 2328 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2329 } else { 2330 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2331 } 2332 } 2333 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2334 mumps->id.ICNTL(26) = -1; 2335 PetscFunctionReturn(0); 2336 } 2337 2338 /* -------------------------------------------------------------------------------------------*/ 2339 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2340 { 2341 Mat St; 2342 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2343 PetscScalar *array; 2344 #if defined(PETSC_USE_COMPLEX) 2345 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2346 #endif 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2351 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2352 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2353 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2354 ierr = MatSetUp(St);CHKERRQ(ierr); 2355 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2356 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2357 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2358 PetscInt i,j,N=mumps->id.size_schur; 2359 for (i=0;i<N;i++) { 2360 for (j=0;j<N;j++) { 2361 #if !defined(PETSC_USE_COMPLEX) 2362 PetscScalar val = mumps->id.schur[i*N+j]; 2363 #else 2364 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2365 #endif 2366 array[j*N+i] = val; 2367 } 2368 } 2369 } else { /* stored by columns */ 2370 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2371 } 2372 } else { /* either full or lower-triangular (not packed) */ 2373 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2374 PetscInt i,j,N=mumps->id.size_schur; 2375 for (i=0;i<N;i++) { 2376 for (j=i;j<N;j++) { 2377 #if !defined(PETSC_USE_COMPLEX) 2378 PetscScalar val = mumps->id.schur[i*N+j]; 2379 #else 2380 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2381 #endif 2382 array[i*N+j] = val; 2383 array[j*N+i] = val; 2384 } 2385 } 2386 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2387 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2388 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2389 PetscInt i,j,N=mumps->id.size_schur; 2390 for (i=0;i<N;i++) { 2391 for (j=0;j<i+1;j++) { 2392 #if !defined(PETSC_USE_COMPLEX) 2393 PetscScalar val = mumps->id.schur[i*N+j]; 2394 #else 2395 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2396 #endif 2397 array[i*N+j] = val; 2398 array[j*N+i] = val; 2399 } 2400 } 2401 } 2402 } 2403 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2404 *S = St; 2405 PetscFunctionReturn(0); 2406 } 2407 2408 /* -------------------------------------------------------------------------------------------*/ 2409 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2410 { 2411 PetscErrorCode ierr; 2412 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2413 2414 PetscFunctionBegin; 2415 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2420 { 2421 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2422 2423 PetscFunctionBegin; 2424 *ival = mumps->id.ICNTL(icntl); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 /*@ 2429 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2430 2431 Logically Collective on Mat 2432 2433 Input Parameters: 2434 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2435 . icntl - index of MUMPS parameter array ICNTL() 2436 - ival - value of MUMPS ICNTL(icntl) 2437 2438 Options Database: 2439 . -mat_mumps_icntl_<icntl> <ival> 2440 2441 Level: beginner 2442 2443 References: 2444 . MUMPS Users' Guide 2445 2446 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2447 @*/ 2448 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2449 { 2450 PetscErrorCode ierr; 2451 2452 PetscFunctionBegin; 2453 PetscValidType(F,1); 2454 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2455 PetscValidLogicalCollectiveInt(F,icntl,2); 2456 PetscValidLogicalCollectiveInt(F,ival,3); 2457 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 /*@ 2462 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2463 2464 Logically Collective on Mat 2465 2466 Input Parameters: 2467 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2468 - icntl - index of MUMPS parameter array ICNTL() 2469 2470 Output Parameter: 2471 . ival - value of MUMPS ICNTL(icntl) 2472 2473 Level: beginner 2474 2475 References: 2476 . MUMPS Users' Guide 2477 2478 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2479 @*/ 2480 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2481 { 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 PetscValidType(F,1); 2486 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2487 PetscValidLogicalCollectiveInt(F,icntl,2); 2488 PetscValidIntPointer(ival,3); 2489 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 /* -------------------------------------------------------------------------------------------*/ 2494 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2495 { 2496 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2497 2498 PetscFunctionBegin; 2499 mumps->id.CNTL(icntl) = val; 2500 PetscFunctionReturn(0); 2501 } 2502 2503 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2504 { 2505 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2506 2507 PetscFunctionBegin; 2508 *val = mumps->id.CNTL(icntl); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 /*@ 2513 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2514 2515 Logically Collective on Mat 2516 2517 Input Parameters: 2518 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2519 . icntl - index of MUMPS parameter array CNTL() 2520 - val - value of MUMPS CNTL(icntl) 2521 2522 Options Database: 2523 . -mat_mumps_cntl_<icntl> <val> 2524 2525 Level: beginner 2526 2527 References: 2528 . MUMPS Users' Guide 2529 2530 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2531 @*/ 2532 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2533 { 2534 PetscErrorCode ierr; 2535 2536 PetscFunctionBegin; 2537 PetscValidType(F,1); 2538 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2539 PetscValidLogicalCollectiveInt(F,icntl,2); 2540 PetscValidLogicalCollectiveReal(F,val,3); 2541 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 /*@ 2546 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2547 2548 Logically Collective on Mat 2549 2550 Input Parameters: 2551 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2552 - icntl - index of MUMPS parameter array CNTL() 2553 2554 Output Parameter: 2555 . val - value of MUMPS CNTL(icntl) 2556 2557 Level: beginner 2558 2559 References: 2560 . MUMPS Users' Guide 2561 2562 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2563 @*/ 2564 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2565 { 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 PetscValidType(F,1); 2570 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2571 PetscValidLogicalCollectiveInt(F,icntl,2); 2572 PetscValidRealPointer(val,3); 2573 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2578 { 2579 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2580 2581 PetscFunctionBegin; 2582 *info = mumps->id.INFO(icntl); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2587 { 2588 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2589 2590 PetscFunctionBegin; 2591 *infog = mumps->id.INFOG(icntl); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2596 { 2597 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2598 2599 PetscFunctionBegin; 2600 *rinfo = mumps->id.RINFO(icntl); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2605 { 2606 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2607 2608 PetscFunctionBegin; 2609 *rinfog = mumps->id.RINFOG(icntl); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2614 { 2615 PetscErrorCode ierr; 2616 Mat Bt = NULL,Btseq = NULL; 2617 PetscBool flg; 2618 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2619 PetscScalar *aa; 2620 PetscInt spnr,*ia,*ja,M,nrhs; 2621 2622 PetscFunctionBegin; 2623 PetscValidIntPointer(spRHS,2); 2624 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2625 if (flg) { 2626 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2627 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2628 2629 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2630 2631 if (mumps->petsc_size > 1) { 2632 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2633 Btseq = b->A; 2634 } else { 2635 Btseq = Bt; 2636 } 2637 2638 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2639 mumps->id.nrhs = nrhs; 2640 mumps->id.lrhs = M; 2641 mumps->id.rhs = NULL; 2642 2643 if (!mumps->myid) { 2644 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2645 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2646 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2647 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2648 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2649 } else { 2650 mumps->id.irhs_ptr = NULL; 2651 mumps->id.irhs_sparse = NULL; 2652 mumps->id.nz_rhs = 0; 2653 mumps->id.rhs_sparse = NULL; 2654 } 2655 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2656 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2657 2658 /* solve phase */ 2659 /*-------------*/ 2660 mumps->id.job = JOB_SOLVE; 2661 PetscMUMPS_c(mumps); 2662 if (mumps->id.INFOG(1) < 0) 2663 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2664 2665 if (!mumps->myid) { 2666 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2667 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2668 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2669 } 2670 PetscFunctionReturn(0); 2671 } 2672 2673 /*@ 2674 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2675 2676 Logically Collective on Mat 2677 2678 Input Parameters: 2679 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2680 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2681 2682 Output Parameter: 2683 . spRHS - requested entries of inverse of A 2684 2685 Level: beginner 2686 2687 References: 2688 . MUMPS Users' Guide 2689 2690 .seealso: MatGetFactor(), MatCreateTranspose() 2691 @*/ 2692 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2693 { 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 PetscValidType(F,1); 2698 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2699 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2700 PetscFunctionReturn(0); 2701 } 2702 2703 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2704 { 2705 PetscErrorCode ierr; 2706 Mat spRHS; 2707 2708 PetscFunctionBegin; 2709 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2710 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2711 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714 2715 /*@ 2716 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2717 2718 Logically Collective on Mat 2719 2720 Input Parameters: 2721 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2722 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2723 2724 Output Parameter: 2725 . spRHST - requested entries of inverse of A^T 2726 2727 Level: beginner 2728 2729 References: 2730 . MUMPS Users' Guide 2731 2732 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2733 @*/ 2734 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2735 { 2736 PetscErrorCode ierr; 2737 PetscBool flg; 2738 2739 PetscFunctionBegin; 2740 PetscValidType(F,1); 2741 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2742 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2743 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2744 2745 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 /*@ 2750 MatMumpsGetInfo - Get MUMPS parameter INFO() 2751 2752 Logically Collective on Mat 2753 2754 Input Parameters: 2755 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2756 - icntl - index of MUMPS parameter array INFO() 2757 2758 Output Parameter: 2759 . ival - value of MUMPS INFO(icntl) 2760 2761 Level: beginner 2762 2763 References: 2764 . MUMPS Users' Guide 2765 2766 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2767 @*/ 2768 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2769 { 2770 PetscErrorCode ierr; 2771 2772 PetscFunctionBegin; 2773 PetscValidType(F,1); 2774 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2775 PetscValidIntPointer(ival,3); 2776 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 /*@ 2781 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2782 2783 Logically Collective on Mat 2784 2785 Input Parameters: 2786 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2787 - icntl - index of MUMPS parameter array INFOG() 2788 2789 Output Parameter: 2790 . ival - value of MUMPS INFOG(icntl) 2791 2792 Level: beginner 2793 2794 References: 2795 . MUMPS Users' Guide 2796 2797 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2798 @*/ 2799 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2800 { 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 PetscValidType(F,1); 2805 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2806 PetscValidIntPointer(ival,3); 2807 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 /*@ 2812 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2813 2814 Logically Collective on Mat 2815 2816 Input Parameters: 2817 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2818 - icntl - index of MUMPS parameter array RINFO() 2819 2820 Output Parameter: 2821 . val - value of MUMPS RINFO(icntl) 2822 2823 Level: beginner 2824 2825 References: 2826 . MUMPS Users' Guide 2827 2828 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2829 @*/ 2830 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2831 { 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 PetscValidType(F,1); 2836 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2837 PetscValidRealPointer(val,3); 2838 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 /*@ 2843 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2844 2845 Logically Collective on Mat 2846 2847 Input Parameters: 2848 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2849 - icntl - index of MUMPS parameter array RINFOG() 2850 2851 Output Parameter: 2852 . val - value of MUMPS RINFOG(icntl) 2853 2854 Level: beginner 2855 2856 References: 2857 . MUMPS Users' Guide 2858 2859 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2860 @*/ 2861 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2862 { 2863 PetscErrorCode ierr; 2864 2865 PetscFunctionBegin; 2866 PetscValidType(F,1); 2867 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2868 PetscValidRealPointer(val,3); 2869 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2870 PetscFunctionReturn(0); 2871 } 2872 2873 /*MC 2874 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2875 distributed and sequential matrices via the external package MUMPS. 2876 2877 Works with MATAIJ and MATSBAIJ matrices 2878 2879 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2880 2881 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2882 2883 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2884 2885 Options Database Keys: 2886 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2887 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2888 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2889 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2890 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2891 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2892 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2893 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2894 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2895 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2896 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2897 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2898 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2899 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2900 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2901 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2902 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2903 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2904 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2905 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2906 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2907 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2908 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2909 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2910 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2911 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2912 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2913 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2914 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2915 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2916 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2917 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2918 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2919 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2920 2921 Level: beginner 2922 2923 Notes: 2924 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2925 2926 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2927 $ KSPGetPC(ksp,&pc); 2928 $ PCFactorGetMatrix(pc,&mat); 2929 $ MatMumpsGetInfo(mat,....); 2930 $ MatMumpsGetInfog(mat,....); etc. 2931 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2932 2933 Two modes to run MUMPS/PETSc with OpenMP 2934 2935 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2936 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2937 2938 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2939 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2940 2941 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2942 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2943 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2944 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2945 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2946 2947 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2948 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2949 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2950 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2951 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2952 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2953 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2954 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2955 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2956 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2957 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2958 examine the mapping result. 2959 2960 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2961 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2962 calls omp_set_num_threads(m) internally before calling MUMPS. 2963 2964 References: 2965 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2966 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2967 2968 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2969 2970 M*/ 2971 2972 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2973 { 2974 PetscFunctionBegin; 2975 *type = MATSOLVERMUMPS; 2976 PetscFunctionReturn(0); 2977 } 2978 2979 /* MatGetFactor for Seq and MPI AIJ matrices */ 2980 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2981 { 2982 Mat B; 2983 PetscErrorCode ierr; 2984 Mat_MUMPS *mumps; 2985 PetscBool isSeqAIJ; 2986 2987 PetscFunctionBegin; 2988 #if defined(PETSC_USE_COMPLEX) 2989 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2990 #endif 2991 /* Create the factorization matrix */ 2992 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2993 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2994 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2995 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2996 ierr = MatSetUp(B);CHKERRQ(ierr); 2997 2998 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2999 3000 B->ops->view = MatView_MUMPS; 3001 B->ops->getinfo = MatGetInfo_MUMPS; 3002 3003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3016 3017 if (ftype == MAT_FACTOR_LU) { 3018 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3019 B->factortype = MAT_FACTOR_LU; 3020 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3021 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3022 mumps->sym = 0; 3023 } else { 3024 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3025 B->factortype = MAT_FACTOR_CHOLESKY; 3026 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3027 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3028 #if defined(PETSC_USE_COMPLEX) 3029 mumps->sym = 2; 3030 #else 3031 if (A->spd_set && A->spd) mumps->sym = 1; 3032 else mumps->sym = 2; 3033 #endif 3034 } 3035 3036 /* set solvertype */ 3037 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3038 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3039 3040 B->ops->destroy = MatDestroy_MUMPS; 3041 B->data = (void*)mumps; 3042 3043 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3044 3045 *F = B; 3046 PetscFunctionReturn(0); 3047 } 3048 3049 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3050 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3051 { 3052 Mat B; 3053 PetscErrorCode ierr; 3054 Mat_MUMPS *mumps; 3055 PetscBool isSeqSBAIJ; 3056 3057 PetscFunctionBegin; 3058 #if defined(PETSC_USE_COMPLEX) 3059 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3060 #endif 3061 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3062 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3063 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3064 ierr = MatSetUp(B);CHKERRQ(ierr); 3065 3066 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3067 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3068 if (isSeqSBAIJ) { 3069 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3070 } else { 3071 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3072 } 3073 3074 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3075 B->ops->view = MatView_MUMPS; 3076 B->ops->getinfo = MatGetInfo_MUMPS; 3077 3078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3091 3092 B->factortype = MAT_FACTOR_CHOLESKY; 3093 #if defined(PETSC_USE_COMPLEX) 3094 mumps->sym = 2; 3095 #else 3096 if (A->spd_set && A->spd) mumps->sym = 1; 3097 else mumps->sym = 2; 3098 #endif 3099 3100 /* set solvertype */ 3101 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3102 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3103 3104 B->ops->destroy = MatDestroy_MUMPS; 3105 B->data = (void*)mumps; 3106 3107 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3108 3109 *F = B; 3110 PetscFunctionReturn(0); 3111 } 3112 3113 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3114 { 3115 Mat B; 3116 PetscErrorCode ierr; 3117 Mat_MUMPS *mumps; 3118 PetscBool isSeqBAIJ; 3119 3120 PetscFunctionBegin; 3121 /* Create the factorization matrix */ 3122 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3123 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3124 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3125 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3126 ierr = MatSetUp(B);CHKERRQ(ierr); 3127 3128 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3129 if (ftype == MAT_FACTOR_LU) { 3130 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3131 B->factortype = MAT_FACTOR_LU; 3132 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3133 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3134 mumps->sym = 0; 3135 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3136 3137 B->ops->view = MatView_MUMPS; 3138 B->ops->getinfo = MatGetInfo_MUMPS; 3139 3140 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3141 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3142 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3143 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3144 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3145 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3146 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3147 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3148 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3149 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3150 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3151 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3152 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3153 3154 /* set solvertype */ 3155 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3156 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3157 3158 B->ops->destroy = MatDestroy_MUMPS; 3159 B->data = (void*)mumps; 3160 3161 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3162 3163 *F = B; 3164 PetscFunctionReturn(0); 3165 } 3166 3167 /* MatGetFactor for Seq and MPI SELL matrices */ 3168 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3169 { 3170 Mat B; 3171 PetscErrorCode ierr; 3172 Mat_MUMPS *mumps; 3173 PetscBool isSeqSELL; 3174 3175 PetscFunctionBegin; 3176 /* Create the factorization matrix */ 3177 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3178 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3179 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3180 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3181 ierr = MatSetUp(B);CHKERRQ(ierr); 3182 3183 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3184 3185 B->ops->view = MatView_MUMPS; 3186 B->ops->getinfo = MatGetInfo_MUMPS; 3187 3188 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3189 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3190 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3191 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3192 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3193 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3194 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3195 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3196 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3197 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3198 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3199 3200 if (ftype == MAT_FACTOR_LU) { 3201 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3202 B->factortype = MAT_FACTOR_LU; 3203 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3204 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3205 mumps->sym = 0; 3206 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3207 3208 /* set solvertype */ 3209 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3210 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3211 3212 B->ops->destroy = MatDestroy_MUMPS; 3213 B->data = (void*)mumps; 3214 3215 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3216 3217 *F = B; 3218 PetscFunctionReturn(0); 3219 } 3220 3221 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3222 { 3223 PetscErrorCode ierr; 3224 3225 PetscFunctionBegin; 3226 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3227 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3228 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3229 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3230 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3231 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3232 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3233 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3234 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3235 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3236 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3237 PetscFunctionReturn(0); 3238 } 3239 3240