1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if defined(INTSIZE64) /* INTSIZE64 is a macro one used to build MUMPS in full 64-bit mode */ 53 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 54 #else 55 #define MPIU_MUMPSINT MPI_INT 56 #define PETSC_MUMPS_INT_MAX 2147483647 57 #define PETSC_MUMPS_INT_MIN -2147483648 58 #endif 59 60 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 61 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 62 { 63 PetscFunctionBegin; 64 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_64BIT_INDICES) 65 if (a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 66 #endif 67 *b = (PetscMUMPSInt)(a); 68 PetscFunctionReturn(0); 69 } 70 71 /* Put these utility routines here since they are only used in this file */ 72 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 73 { 74 PetscErrorCode ierr; 75 PetscInt myval; 76 PetscBool myset; 77 PetscFunctionBegin; 78 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 79 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 80 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 81 if (set) *set = myset; 82 PetscFunctionReturn(0); 83 } 84 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 85 86 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 87 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 88 #define PetscMUMPS_c(mumps) \ 89 do { \ 90 if (mumps->use_petsc_omp_support) { \ 91 if (mumps->is_omp_master) { \ 92 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 93 MUMPS_c(&mumps->id); \ 94 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 95 } \ 96 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 98 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 99 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 100 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 101 */ \ 102 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 103 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 104 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 105 } else { \ 106 MUMPS_c(&mumps->id); \ 107 } \ 108 } while(0) 109 #else 110 #define PetscMUMPS_c(mumps) \ 111 do { MUMPS_c(&mumps->id); } while (0) 112 #endif 113 114 /* declare MumpsScalar */ 115 #if defined(PETSC_USE_COMPLEX) 116 #if defined(PETSC_USE_REAL_SINGLE) 117 #define MumpsScalar mumps_complex 118 #else 119 #define MumpsScalar mumps_double_complex 120 #endif 121 #else 122 #define MumpsScalar PetscScalar 123 #endif 124 125 /* macros s.t. indices match MUMPS documentation */ 126 #define ICNTL(I) icntl[(I)-1] 127 #define CNTL(I) cntl[(I)-1] 128 #define INFOG(I) infog[(I)-1] 129 #define INFO(I) info[(I)-1] 130 #define RINFOG(I) rinfog[(I)-1] 131 #define RINFO(I) rinfo[(I)-1] 132 133 typedef struct Mat_MUMPS Mat_MUMPS; 134 struct Mat_MUMPS { 135 #if defined(PETSC_USE_COMPLEX) 136 #if defined(PETSC_USE_REAL_SINGLE) 137 CMUMPS_STRUC_C id; 138 #else 139 ZMUMPS_STRUC_C id; 140 #endif 141 #else 142 #if defined(PETSC_USE_REAL_SINGLE) 143 SMUMPS_STRUC_C id; 144 #else 145 DMUMPS_STRUC_C id; 146 #endif 147 #endif 148 149 MatStructure matstruc; 150 PetscMPIInt myid,petsc_size; 151 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 152 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 153 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 154 PetscMUMPSInt sym; 155 MPI_Comm mumps_comm; 156 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 157 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 158 Vec b_seq,x_seq; 159 PetscInt ninfo,*info; /* which INFO to display */ 160 PetscInt sizeredrhs; 161 PetscScalar *schur_sol; 162 PetscInt schur_sizesol; 163 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 164 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 165 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 166 167 /* stuff used by petsc/mumps OpenMP support*/ 168 PetscBool use_petsc_omp_support; 169 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 170 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 171 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 172 PetscMPIInt tag,omp_comm_size; 173 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 174 MPI_Request *reqs; 175 }; 176 177 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 178 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 179 */ 180 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 181 { 182 PetscErrorCode ierr; 183 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 184 185 PetscFunctionBegin; 186 #if defined(PETSC_USE_64BIT_INDICES) 187 { 188 PetscInt i; 189 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 190 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 191 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 192 mumps->cur_ilen = nrow+1; 193 } 194 if (nnz > mumps->cur_jlen) { 195 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 196 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 197 mumps->cur_jlen = nnz; 198 } 199 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 200 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 201 *ia_mumps = mumps->ia_alloc; 202 *ja_mumps = mumps->ja_alloc; 203 } 204 #else 205 *ia_mumps = ia; 206 *ja_mumps = ja; 207 #endif 208 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 213 { 214 PetscErrorCode ierr; 215 216 PetscFunctionBegin; 217 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 218 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 219 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 220 mumps->id.size_schur = 0; 221 mumps->id.schur_lld = 0; 222 mumps->id.ICNTL(19) = 0; 223 PetscFunctionReturn(0); 224 } 225 226 /* solve with rhs in mumps->id.redrhs and return in the same location */ 227 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 228 { 229 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 230 Mat S,B,X; 231 MatFactorSchurStatus schurstatus; 232 PetscInt sizesol; 233 PetscErrorCode ierr; 234 235 PetscFunctionBegin; 236 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 237 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 239 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 240 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 241 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 242 #endif 243 switch (schurstatus) { 244 case MAT_FACTOR_SCHUR_FACTORED: 245 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 246 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 247 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 248 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 249 #endif 250 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 251 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 252 } else { 253 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 254 } 255 break; 256 case MAT_FACTOR_SCHUR_INVERTED: 257 sizesol = mumps->id.nrhs*mumps->id.size_schur; 258 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 259 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 260 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 261 mumps->schur_sizesol = sizesol; 262 } 263 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 264 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 265 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 266 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 267 #endif 268 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 269 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 270 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 271 } else { 272 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 273 } 274 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 275 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 276 ierr = MatProductNumeric(X);CHKERRQ(ierr); 277 278 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 279 break; 280 default: 281 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 282 break; 283 } 284 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 285 ierr = MatDestroy(&B);CHKERRQ(ierr); 286 ierr = MatDestroy(&X);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 291 { 292 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 293 PetscErrorCode ierr; 294 295 PetscFunctionBegin; 296 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 297 PetscFunctionReturn(0); 298 } 299 if (!expansion) { /* prepare for the condensation step */ 300 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 301 /* allocate MUMPS internal array to store reduced right-hand sides */ 302 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 303 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 304 mumps->id.lredrhs = mumps->id.size_schur; 305 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 306 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 307 } 308 mumps->id.ICNTL(26) = 1; /* condensation phase */ 309 } else { /* prepare for the expansion step */ 310 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 311 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 312 mumps->id.ICNTL(26) = 2; /* expansion phase */ 313 PetscMUMPS_c(mumps); 314 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 315 /* restore defaults */ 316 mumps->id.ICNTL(26) = -1; 317 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 318 if (mumps->id.nrhs > 1) { 319 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 320 mumps->id.lredrhs = 0; 321 mumps->sizeredrhs = 0; 322 } 323 } 324 PetscFunctionReturn(0); 325 } 326 327 /* 328 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 329 330 input: 331 A - matrix in aij,baij or sbaij format 332 shift - 0: C style output triple; 1: Fortran style output triple. 333 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 334 MAT_REUSE_MATRIX: only the values in v array are updated 335 output: 336 nnz - dim of r, c, and v (number of local nonzero entries of A) 337 r, c, v - row and col index, matrix values (matrix triples) 338 339 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 340 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 341 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 342 343 */ 344 345 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 346 { 347 const PetscScalar *av; 348 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 349 PetscInt64 nz,rnz,i,j,k; 350 PetscErrorCode ierr; 351 PetscMUMPSInt *row,*col; 352 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 353 354 PetscFunctionBegin; 355 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 356 mumps->val = (PetscScalar*)av; 357 if (reuse == MAT_INITIAL_MATRIX) { 358 nz = aa->nz; 359 ai = aa->i; 360 aj = aa->j; 361 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 362 for (i=k=0; i<M; i++) { 363 rnz = ai[i+1] - ai[i]; 364 ajj = aj + ai[i]; 365 for (j=0; j<rnz; j++) { 366 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 367 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 368 k++; 369 } 370 } 371 mumps->irn = row; 372 mumps->jcn = col; 373 mumps->nnz = nz; 374 } 375 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 380 { 381 PetscErrorCode ierr; 382 PetscInt64 nz,i,j,k,r; 383 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 384 PetscMUMPSInt *row,*col; 385 386 PetscFunctionBegin; 387 mumps->val = a->val; 388 if (reuse == MAT_INITIAL_MATRIX) { 389 nz = a->sliidx[a->totalslices]; 390 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 391 for (i=k=0; i<a->totalslices; i++) { 392 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 393 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 394 } 395 } 396 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 397 mumps->irn = row; 398 mumps->jcn = col; 399 mumps->nnz = nz; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 405 { 406 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 407 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 408 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 409 PetscInt bs; 410 PetscErrorCode ierr; 411 PetscMUMPSInt *row,*col; 412 413 PetscFunctionBegin; 414 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 415 M = A->rmap->N/bs; 416 mumps->val = aa->a; 417 if (reuse == MAT_INITIAL_MATRIX) { 418 ai = aa->i; aj = aa->j; 419 nz = bs2*aa->nz; 420 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 421 for (i=0; i<M; i++) { 422 ajj = aj + ai[i]; 423 rnz = ai[i+1] - ai[i]; 424 for (k=0; k<rnz; k++) { 425 for (j=0; j<bs; j++) { 426 for (m=0; m<bs; m++) { 427 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 428 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 429 idx++; 430 } 431 } 432 } 433 } 434 mumps->irn = row; 435 mumps->jcn = col; 436 mumps->nnz = nz; 437 } 438 PetscFunctionReturn(0); 439 } 440 441 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 442 { 443 const PetscInt *ai, *aj,*ajj; 444 PetscInt bs; 445 PetscInt64 nz,rnz,i,j,k,m; 446 PetscErrorCode ierr; 447 PetscMUMPSInt *row,*col; 448 PetscScalar *val; 449 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 450 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 451 #if defined(PETSC_USE_COMPLEX) 452 PetscBool hermitian; 453 #endif 454 455 PetscFunctionBegin; 456 #if defined(PETSC_USE_COMPLEX) 457 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 458 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 459 #endif 460 ai = aa->i; 461 aj = aa->j; 462 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 463 if (reuse == MAT_INITIAL_MATRIX) { 464 nz = aa->nz; 465 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 466 if (bs>1) { 467 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 468 mumps->val = mumps->val_alloc; 469 } else { 470 mumps->val = aa->a; 471 } 472 mumps->irn = row; 473 mumps->jcn = col; 474 } else { 475 if (bs == 1) mumps->val = aa->a; 476 row = mumps->irn; 477 col = mumps->jcn; 478 } 479 val = mumps->val; 480 481 nz = 0; 482 if (bs>1) { 483 for (i=0; i<mbs; i++) { 484 rnz = ai[i+1] - ai[i]; 485 ajj = aj + ai[i]; 486 for (j=0; j<rnz; j++) { 487 for (k=0; k<bs; k++) { 488 for (m=0; m<bs; m++) { 489 if (ajj[j]>i || k>=m) { 490 if (reuse == MAT_INITIAL_MATRIX) { 491 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 492 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 493 } 494 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 495 } 496 } 497 } 498 } 499 } 500 } else if (reuse == MAT_INITIAL_MATRIX) { 501 for (i=0; i<mbs; i++) { 502 rnz = ai[i+1] - ai[i]; 503 ajj = aj + ai[i]; 504 for (j=0; j<rnz; j++) { 505 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 506 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 507 nz++; 508 } 509 } 510 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 511 } 512 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 513 PetscFunctionReturn(0); 514 } 515 516 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 517 { 518 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 519 PetscInt64 nz,rnz,i,j; 520 const PetscScalar *av,*v1; 521 PetscScalar *val; 522 PetscErrorCode ierr; 523 PetscMUMPSInt *row,*col; 524 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 525 PetscBool missing; 526 #if defined(PETSC_USE_COMPLEX) 527 PetscBool hermitian; 528 #endif 529 530 PetscFunctionBegin; 531 #if defined(PETSC_USE_COMPLEX) 532 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 533 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 534 #endif 535 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 536 ai = aa->i; aj = aa->j; 537 adiag = aa->diag; 538 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 539 if (reuse == MAT_INITIAL_MATRIX) { 540 /* count nz in the upper triangular part of A */ 541 nz = 0; 542 if (missing) { 543 for (i=0; i<M; i++) { 544 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 545 for (j=ai[i];j<ai[i+1];j++) { 546 if (aj[j] < i) continue; 547 nz++; 548 } 549 } else { 550 nz += ai[i+1] - adiag[i]; 551 } 552 } 553 } else { 554 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 555 } 556 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 557 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 558 mumps->nnz = nz; 559 mumps->irn = row; 560 mumps->jcn = col; 561 mumps->val = mumps->val_alloc = val; 562 563 nz = 0; 564 if (missing) { 565 for (i=0; i<M; i++) { 566 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 567 for (j=ai[i];j<ai[i+1];j++) { 568 if (aj[j] < i) continue; 569 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 570 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 571 val[nz] = av[j]; 572 nz++; 573 } 574 } else { 575 rnz = ai[i+1] - adiag[i]; 576 ajj = aj + adiag[i]; 577 v1 = av + adiag[i]; 578 for (j=0; j<rnz; j++) { 579 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 580 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 581 val[nz++] = v1[j]; 582 } 583 } 584 } 585 } else { 586 for (i=0; i<M; i++) { 587 rnz = ai[i+1] - adiag[i]; 588 ajj = aj + adiag[i]; 589 v1 = av + adiag[i]; 590 for (j=0; j<rnz; j++) { 591 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 592 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 593 val[nz++] = v1[j]; 594 } 595 } 596 } 597 } else { 598 nz = 0; 599 val = mumps->val; 600 if (missing) { 601 for (i=0; i <M; i++) { 602 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 603 for (j=ai[i];j<ai[i+1];j++) { 604 if (aj[j] < i) continue; 605 val[nz++] = av[j]; 606 } 607 } else { 608 rnz = ai[i+1] - adiag[i]; 609 v1 = av + adiag[i]; 610 for (j=0; j<rnz; j++) { 611 val[nz++] = v1[j]; 612 } 613 } 614 } 615 } else { 616 for (i=0; i <M; i++) { 617 rnz = ai[i+1] - adiag[i]; 618 v1 = av + adiag[i]; 619 for (j=0; j<rnz; j++) { 620 val[nz++] = v1[j]; 621 } 622 } 623 } 624 } 625 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 630 { 631 PetscErrorCode ierr; 632 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 633 PetscInt bs; 634 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 635 PetscMUMPSInt *row,*col; 636 const PetscScalar *av,*bv,*v1,*v2; 637 PetscScalar *val; 638 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 639 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 640 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 641 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 642 #if defined(PETSC_USE_COMPLEX) 643 PetscBool hermitian; 644 #endif 645 646 PetscFunctionBegin; 647 #if defined(PETSC_USE_COMPLEX) 648 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 649 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 650 #endif 651 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 652 rstart = A->rmap->rstart; 653 ai = aa->i; 654 aj = aa->j; 655 bi = bb->i; 656 bj = bb->j; 657 av = aa->a; 658 bv = bb->a; 659 660 garray = mat->garray; 661 662 if (reuse == MAT_INITIAL_MATRIX) { 663 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 664 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 665 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 666 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 667 mumps->irn = row; 668 mumps->jcn = col; 669 mumps->val = mumps->val_alloc = val; 670 } else { 671 val = mumps->val; 672 } 673 674 jj = 0; irow = rstart; 675 for (i=0; i<mbs; i++) { 676 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 677 countA = ai[i+1] - ai[i]; 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 v1 = av + ai[i]*bs2; 681 v2 = bv + bi[i]*bs2; 682 683 if (bs>1) { 684 /* A-part */ 685 for (j=0; j<countA; j++) { 686 for (k=0; k<bs; k++) { 687 for (m=0; m<bs; m++) { 688 if (rstart + ajj[j]*bs>irow || k>=m) { 689 if (reuse == MAT_INITIAL_MATRIX) { 690 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 691 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 692 } 693 val[jj++] = v1[j*bs2 + m + k*bs]; 694 } 695 } 696 } 697 } 698 699 /* B-part */ 700 for (j=0; j < countB; j++) { 701 for (k=0; k<bs; k++) { 702 for (m=0; m<bs; m++) { 703 if (reuse == MAT_INITIAL_MATRIX) { 704 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 705 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 706 } 707 val[jj++] = v2[j*bs2 + m + k*bs]; 708 } 709 } 710 } 711 } else { 712 /* A-part */ 713 for (j=0; j<countA; j++) { 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 716 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 717 } 718 val[jj++] = v1[j]; 719 } 720 721 /* B-part */ 722 for (j=0; j < countB; j++) { 723 if (reuse == MAT_INITIAL_MATRIX) { 724 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 725 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 726 } 727 val[jj++] = v2[j]; 728 } 729 } 730 irow+=bs; 731 } 732 mumps->nnz = jj; 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 737 { 738 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 739 PetscErrorCode ierr; 740 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 741 PetscMUMPSInt *row,*col; 742 const PetscScalar *av, *bv,*v1,*v2; 743 PetscScalar *val; 744 Mat Ad,Ao; 745 Mat_SeqAIJ *aa; 746 Mat_SeqAIJ *bb; 747 748 PetscFunctionBegin; 749 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 750 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 751 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 752 753 aa = (Mat_SeqAIJ*)(Ad)->data; 754 bb = (Mat_SeqAIJ*)(Ao)->data; 755 ai = aa->i; 756 aj = aa->j; 757 bi = bb->i; 758 bj = bb->j; 759 760 rstart = A->rmap->rstart; 761 762 if (reuse == MAT_INITIAL_MATRIX) { 763 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 764 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 765 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 766 mumps->nnz = nz; 767 mumps->irn = row; 768 mumps->jcn = col; 769 mumps->val = mumps->val_alloc = val; 770 } else { 771 val = mumps->val; 772 } 773 774 jj = 0; irow = rstart; 775 for (i=0; i<m; i++) { 776 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 777 countA = ai[i+1] - ai[i]; 778 countB = bi[i+1] - bi[i]; 779 bjj = bj + bi[i]; 780 v1 = av + ai[i]; 781 v2 = bv + bi[i]; 782 783 /* A-part */ 784 for (j=0; j<countA; j++) { 785 if (reuse == MAT_INITIAL_MATRIX) { 786 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 787 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 788 } 789 val[jj++] = v1[j]; 790 } 791 792 /* B-part */ 793 for (j=0; j < countB; j++) { 794 if (reuse == MAT_INITIAL_MATRIX) { 795 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 796 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 797 } 798 val[jj++] = v2[j]; 799 } 800 irow++; 801 } 802 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 803 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 808 { 809 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 810 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 811 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 812 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 813 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 814 const PetscInt bs2=mat->bs2; 815 PetscErrorCode ierr; 816 PetscInt bs; 817 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 818 PetscMUMPSInt *row,*col; 819 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 820 PetscScalar *val; 821 822 PetscFunctionBegin; 823 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 824 if (reuse == MAT_INITIAL_MATRIX) { 825 nz = bs2*(aa->nz + bb->nz); 826 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 827 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 828 mumps->nnz = nz; 829 mumps->irn = row; 830 mumps->jcn = col; 831 mumps->val = mumps->val_alloc = val; 832 } else { 833 val = mumps->val; 834 } 835 836 jj = 0; irow = rstart; 837 for (i=0; i<mbs; i++) { 838 countA = ai[i+1] - ai[i]; 839 countB = bi[i+1] - bi[i]; 840 ajj = aj + ai[i]; 841 bjj = bj + bi[i]; 842 v1 = av + bs2*ai[i]; 843 v2 = bv + bs2*bi[i]; 844 845 idx = 0; 846 /* A-part */ 847 for (k=0; k<countA; k++) { 848 for (j=0; j<bs; j++) { 849 for (n=0; n<bs; n++) { 850 if (reuse == MAT_INITIAL_MATRIX) { 851 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 852 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 853 } 854 val[jj++] = v1[idx++]; 855 } 856 } 857 } 858 859 idx = 0; 860 /* B-part */ 861 for (k=0; k<countB; k++) { 862 for (j=0; j<bs; j++) { 863 for (n=0; n<bs; n++) { 864 if (reuse == MAT_INITIAL_MATRIX) { 865 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 866 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 867 } 868 val[jj++] = v2[idx++]; 869 } 870 } 871 } 872 irow += bs; 873 } 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 878 { 879 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 880 PetscErrorCode ierr; 881 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 882 PetscMUMPSInt *row,*col; 883 const PetscScalar *av, *bv,*v1,*v2; 884 PetscScalar *val; 885 Mat Ad,Ao; 886 Mat_SeqAIJ *aa; 887 Mat_SeqAIJ *bb; 888 #if defined(PETSC_USE_COMPLEX) 889 PetscBool hermitian; 890 #endif 891 892 PetscFunctionBegin; 893 #if defined(PETSC_USE_COMPLEX) 894 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 895 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 896 #endif 897 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 898 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 899 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 900 901 aa = (Mat_SeqAIJ*)(Ad)->data; 902 bb = (Mat_SeqAIJ*)(Ao)->data; 903 ai = aa->i; 904 aj = aa->j; 905 adiag = aa->diag; 906 bi = bb->i; 907 bj = bb->j; 908 909 rstart = A->rmap->rstart; 910 911 if (reuse == MAT_INITIAL_MATRIX) { 912 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 913 nzb = 0; /* num of upper triangular entries in mat->B */ 914 for (i=0; i<m; i++) { 915 nza += (ai[i+1] - adiag[i]); 916 countB = bi[i+1] - bi[i]; 917 bjj = bj + bi[i]; 918 for (j=0; j<countB; j++) { 919 if (garray[bjj[j]] > rstart) nzb++; 920 } 921 } 922 923 nz = nza + nzb; /* total nz of upper triangular part of mat */ 924 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 925 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 926 mumps->nnz = nz; 927 mumps->irn = row; 928 mumps->jcn = col; 929 mumps->val = mumps->val_alloc = val; 930 } else { 931 val = mumps->val; 932 } 933 934 jj = 0; irow = rstart; 935 for (i=0; i<m; i++) { 936 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 937 v1 = av + adiag[i]; 938 countA = ai[i+1] - adiag[i]; 939 countB = bi[i+1] - bi[i]; 940 bjj = bj + bi[i]; 941 v2 = bv + bi[i]; 942 943 /* A-part */ 944 for (j=0; j<countA; j++) { 945 if (reuse == MAT_INITIAL_MATRIX) { 946 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 947 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 948 } 949 val[jj++] = v1[j]; 950 } 951 952 /* B-part */ 953 for (j=0; j < countB; j++) { 954 if (garray[bjj[j]] > rstart) { 955 if (reuse == MAT_INITIAL_MATRIX) { 956 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 957 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 958 } 959 val[jj++] = v2[j]; 960 } 961 } 962 irow++; 963 } 964 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 965 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode MatDestroy_MUMPS(Mat A) 970 { 971 PetscErrorCode ierr; 972 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 973 974 PetscFunctionBegin; 975 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 977 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 978 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 979 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 980 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 981 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 982 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 983 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 984 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 985 mumps->id.job = JOB_END; 986 PetscMUMPS_c(mumps); 987 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 988 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 989 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 990 #endif 991 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 992 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 994 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 995 ierr = PetscFree(A->data);CHKERRQ(ierr); 996 997 /* clear composed functions */ 998 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 999 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1000 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1001 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1002 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1003 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1004 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1005 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1006 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1007 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1008 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1009 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1010 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1015 { 1016 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1017 PetscScalar *array; 1018 Vec b_seq; 1019 IS is_iden,is_petsc; 1020 PetscErrorCode ierr; 1021 PetscInt i; 1022 PetscBool second_solve = PETSC_FALSE; 1023 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1024 1025 PetscFunctionBegin; 1026 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1027 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1028 1029 if (A->factorerrortype) { 1030 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1031 ierr = VecSetInf(x);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 mumps->id.ICNTL(20) = 0; /* dense RHS */ 1036 mumps->id.nrhs = 1; 1037 b_seq = mumps->b_seq; 1038 if (mumps->petsc_size > 1) { 1039 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 1040 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 1043 } else { /* petsc_size == 1 */ 1044 ierr = VecCopy(b,x);CHKERRQ(ierr); 1045 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1046 } 1047 if (!mumps->myid) { /* define rhs on the host */ 1048 mumps->id.nrhs = 1; 1049 mumps->id.rhs = (MumpsScalar*)array; 1050 } 1051 1052 /* 1053 handle condensation step of Schur complement (if any) 1054 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1055 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1056 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1057 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1058 */ 1059 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1060 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1061 second_solve = PETSC_TRUE; 1062 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1063 } 1064 /* solve phase */ 1065 /*-------------*/ 1066 mumps->id.job = JOB_SOLVE; 1067 PetscMUMPS_c(mumps); 1068 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1069 1070 /* handle expansion step of Schur complement (if any) */ 1071 if (second_solve) { 1072 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1073 } 1074 1075 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1076 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1077 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1078 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1079 } 1080 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1081 PetscInt *isol2_loc=NULL; 1082 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1083 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1084 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1085 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1086 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1087 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1088 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1089 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1090 } 1091 1092 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1093 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1094 } 1095 1096 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 1097 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1098 1099 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1104 { 1105 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 mumps->id.ICNTL(9) = 0; 1110 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1111 mumps->id.ICNTL(9) = 1; 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1116 { 1117 PetscErrorCode ierr; 1118 Mat Bt = NULL; 1119 PetscBool denseX,denseB,flg,flgT; 1120 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1121 PetscInt i,nrhs,M; 1122 PetscScalar *array; 1123 const PetscScalar *rbray; 1124 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1125 PetscMUMPSInt *isol_loc,*isol_loc_save; 1126 PetscScalar *bray,*sol_loc,*sol_loc_save; 1127 IS is_to,is_from; 1128 PetscInt k,proc,j,m,myrstart; 1129 const PetscInt *rstart; 1130 Vec v_mpi,b_seq,msol_loc; 1131 VecScatter scat_rhs,scat_sol; 1132 PetscScalar *aa; 1133 PetscInt spnr,*ia,*ja; 1134 Mat_MPIAIJ *b = NULL; 1135 1136 PetscFunctionBegin; 1137 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1138 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1139 1140 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1141 if (denseB) { 1142 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1143 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1144 } else { /* sparse B */ 1145 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1146 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1147 if (flgT) { /* input B is transpose of actural RHS matrix, 1148 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1149 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1150 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1151 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1152 } 1153 1154 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1155 mumps->id.nrhs = nrhs; 1156 mumps->id.lrhs = M; 1157 mumps->id.rhs = NULL; 1158 1159 if (mumps->petsc_size == 1) { 1160 PetscScalar *aa; 1161 PetscInt spnr,*ia,*ja; 1162 PetscBool second_solve = PETSC_FALSE; 1163 1164 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1165 mumps->id.rhs = (MumpsScalar*)array; 1166 1167 if (denseB) { 1168 /* copy B to X */ 1169 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1170 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1171 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1172 } else { /* sparse B */ 1173 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1174 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1175 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1176 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1177 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1178 } 1179 /* handle condensation step of Schur complement (if any) */ 1180 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1181 second_solve = PETSC_TRUE; 1182 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1183 } 1184 /* solve phase */ 1185 /*-------------*/ 1186 mumps->id.job = JOB_SOLVE; 1187 PetscMUMPS_c(mumps); 1188 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1189 1190 /* handle expansion step of Schur complement (if any) */ 1191 if (second_solve) { 1192 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1193 } 1194 if (!denseB) { /* sparse B */ 1195 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1196 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1197 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1198 } 1199 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1204 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1205 1206 /* create msol_loc to hold mumps local solution */ 1207 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1208 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1209 1210 lsol_loc = mumps->id.lsol_loc; 1211 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1212 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1213 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1214 mumps->id.isol_loc = isol_loc; 1215 1216 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1217 1218 if (denseB) { /* dense B */ 1219 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1220 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1221 0, re-arrange B into desired order, which is a local operation. 1222 */ 1223 1224 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1225 /* wrap dense rhs matrix B into a vector v_mpi */ 1226 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1227 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1228 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1229 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1230 1231 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1232 if (!mumps->myid) { 1233 PetscInt *idx; 1234 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1235 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1236 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1237 k = 0; 1238 for (proc=0; proc<mumps->petsc_size; proc++){ 1239 for (j=0; j<nrhs; j++){ 1240 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1241 } 1242 } 1243 1244 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1245 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1246 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1247 } else { 1248 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1249 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1250 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1251 } 1252 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1253 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1255 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 1258 if (!mumps->myid) { /* define rhs on the host */ 1259 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1260 mumps->id.rhs = (MumpsScalar*)bray; 1261 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1262 } 1263 1264 } else { /* sparse B */ 1265 b = (Mat_MPIAIJ*)Bt->data; 1266 1267 /* wrap dense X into a vector v_mpi */ 1268 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1269 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1270 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1271 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1272 1273 if (!mumps->myid) { 1274 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1275 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1276 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1277 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1278 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1279 } else { 1280 mumps->id.irhs_ptr = NULL; 1281 mumps->id.irhs_sparse = NULL; 1282 mumps->id.nz_rhs = 0; 1283 mumps->id.rhs_sparse = NULL; 1284 } 1285 } 1286 1287 /* solve phase */ 1288 /*-------------*/ 1289 mumps->id.job = JOB_SOLVE; 1290 PetscMUMPS_c(mumps); 1291 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1292 1293 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1294 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1295 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1296 1297 /* create scatter scat_sol */ 1298 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1299 /* iidx: index for scatter mumps solution to petsc X */ 1300 1301 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1302 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1303 for (i=0; i<lsol_loc; i++) { 1304 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1305 1306 for (proc=0; proc<mumps->petsc_size; proc++){ 1307 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1308 myrstart = rstart[proc]; 1309 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1310 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1311 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1312 break; 1313 } 1314 } 1315 1316 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1317 } 1318 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1319 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1320 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1322 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1323 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1324 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1325 1326 /* free spaces */ 1327 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1328 mumps->id.isol_loc = isol_loc_save; 1329 1330 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1331 ierr = PetscFree(idxx);CHKERRQ(ierr); 1332 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1333 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1334 if (!denseB) { 1335 if (!mumps->myid) { 1336 b = (Mat_MPIAIJ*)Bt->data; 1337 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1338 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1339 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1340 } 1341 } else { 1342 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1343 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1344 } 1345 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1346 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1351 { 1352 PetscErrorCode ierr; 1353 PetscBool flg; 1354 Mat B; 1355 1356 PetscFunctionBegin; 1357 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1358 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1359 1360 /* Create B=Bt^T that uses Bt's data structure */ 1361 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1362 1363 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1364 ierr = MatDestroy(&B);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #if !defined(PETSC_USE_COMPLEX) 1369 /* 1370 input: 1371 F: numeric factor 1372 output: 1373 nneg: total number of negative pivots 1374 nzero: total number of zero pivots 1375 npos: (global dimension of F) - nneg - nzero 1376 */ 1377 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1378 { 1379 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1380 PetscErrorCode ierr; 1381 PetscMPIInt size; 1382 1383 PetscFunctionBegin; 1384 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1385 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1386 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1387 1388 if (nneg) *nneg = mumps->id.INFOG(12); 1389 if (nzero || npos) { 1390 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1391 if (nzero) *nzero = mumps->id.INFOG(28); 1392 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 #endif 1397 1398 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1399 { 1400 PetscErrorCode ierr; 1401 PetscInt i,nreqs; 1402 PetscMUMPSInt *irn,*jcn; 1403 PetscMPIInt count; 1404 PetscInt64 totnnz,remain; 1405 const PetscInt osize=mumps->omp_comm_size; 1406 PetscScalar *val; 1407 1408 PetscFunctionBegin; 1409 if (osize > 1) { 1410 if (reuse == MAT_INITIAL_MATRIX) { 1411 /* master first gathers counts of nonzeros to receive */ 1412 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1413 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1414 1415 /* Then each computes number of send/recvs */ 1416 if (mumps->is_omp_master) { 1417 /* Start from 1 since self communication is not done in MPI */ 1418 nreqs = 0; 1419 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1420 } else { 1421 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1422 } 1423 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1424 1425 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1426 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1427 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1428 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1429 */ 1430 nreqs = 0; /* counter for actual send/recvs */ 1431 if (mumps->is_omp_master) { 1432 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1433 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1434 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1435 1436 /* Self communication */ 1437 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1438 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1439 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1440 1441 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1442 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1443 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1444 mumps->nnz = totnnz; 1445 mumps->irn = irn; 1446 mumps->jcn = jcn; 1447 mumps->val = mumps->val_alloc = val; 1448 1449 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1450 jcn += mumps->recvcount[0]; 1451 val += mumps->recvcount[0]; 1452 1453 /* Remote communication */ 1454 for (i=1; i<osize; i++) { 1455 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1456 remain = mumps->recvcount[i] - count; 1457 while (count>0) { 1458 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1459 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1460 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1461 irn += count; 1462 jcn += count; 1463 val += count; 1464 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1465 remain -= count; 1466 } 1467 } 1468 } else { 1469 irn = mumps->irn; 1470 jcn = mumps->jcn; 1471 val = mumps->val; 1472 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1473 remain = mumps->nnz - count; 1474 while (count>0) { 1475 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1476 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1477 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1478 irn += count; 1479 jcn += count; 1480 val += count; 1481 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1482 remain -= count; 1483 } 1484 } 1485 } else { 1486 nreqs = 0; 1487 if (mumps->is_omp_master) { 1488 val = mumps->val + mumps->recvcount[0]; 1489 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1490 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1491 remain = mumps->recvcount[i] - count; 1492 while (count>0) { 1493 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1494 val += count; 1495 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1496 remain -= count; 1497 } 1498 } 1499 } else { 1500 val = mumps->val; 1501 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1502 remain = mumps->nnz - count; 1503 while (count>0) { 1504 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1505 val += count; 1506 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1507 remain -= count; 1508 } 1509 } 1510 } 1511 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1512 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1518 { 1519 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1520 PetscErrorCode ierr; 1521 PetscBool isMPIAIJ; 1522 1523 PetscFunctionBegin; 1524 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1525 if (mumps->id.INFOG(1) == -6) { 1526 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1527 } 1528 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1533 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1534 1535 /* numerical factorization phase */ 1536 /*-------------------------------*/ 1537 mumps->id.job = JOB_FACTNUMERIC; 1538 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1539 if (!mumps->myid) { 1540 mumps->id.a = (MumpsScalar*)mumps->val; 1541 } 1542 } else { 1543 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1544 } 1545 PetscMUMPS_c(mumps); 1546 if (mumps->id.INFOG(1) < 0) { 1547 if (A->erroriffailure) { 1548 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1549 } else { 1550 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1551 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1552 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1553 } else if (mumps->id.INFOG(1) == -13) { 1554 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1555 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1556 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1557 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1558 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1559 } else { 1560 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1561 F->factorerrortype = MAT_FACTOR_OTHER; 1562 } 1563 } 1564 } 1565 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1566 1567 F->assembled = PETSC_TRUE; 1568 mumps->matstruc = SAME_NONZERO_PATTERN; 1569 if (F->schur) { /* reset Schur status to unfactored */ 1570 #if defined(PETSC_HAVE_CUDA) 1571 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1572 #endif 1573 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1574 mumps->id.ICNTL(19) = 2; 1575 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1576 } 1577 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1578 } 1579 1580 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1581 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1582 1583 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1584 if (mumps->petsc_size > 1) { 1585 PetscInt lsol_loc; 1586 PetscScalar *sol_loc; 1587 1588 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1589 1590 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1591 if (mumps->x_seq) { 1592 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1593 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1594 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1595 } 1596 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1597 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1598 mumps->id.lsol_loc = lsol_loc; 1599 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1600 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1601 } 1602 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /* Sets MUMPS options from the options database */ 1607 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1608 { 1609 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1610 PetscErrorCode ierr; 1611 PetscMUMPSInt icntl=0; 1612 PetscInt info[80],i,ninfo=80; 1613 PetscBool flg=PETSC_FALSE; 1614 1615 PetscFunctionBegin; 1616 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1617 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1618 if (flg) mumps->id.ICNTL(1) = icntl; 1619 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1620 if (flg) mumps->id.ICNTL(2) = icntl; 1621 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1622 if (flg) mumps->id.ICNTL(3) = icntl; 1623 1624 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1625 if (flg) mumps->id.ICNTL(4) = icntl; 1626 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1627 1628 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1629 if (flg) mumps->id.ICNTL(6) = icntl; 1630 1631 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1632 if (flg) { 1633 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1634 else mumps->id.ICNTL(7) = icntl; 1635 } 1636 1637 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1638 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1639 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1640 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1641 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1642 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1643 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1644 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1645 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1646 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1647 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1648 } 1649 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1650 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1651 1652 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1653 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1654 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1655 if (mumps->id.ICNTL(24)) { 1656 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1657 } 1658 1659 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1660 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1661 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1662 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1663 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1664 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1665 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1666 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1667 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1668 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1669 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1670 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1671 1672 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1673 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1674 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1675 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1676 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1677 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1678 1679 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1680 1681 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1682 if (ninfo) { 1683 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1684 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1685 mumps->ninfo = ninfo; 1686 for (i=0; i<ninfo; i++) { 1687 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1688 else mumps->info[i] = info[i]; 1689 } 1690 } 1691 1692 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1697 { 1698 PetscErrorCode ierr; 1699 PetscInt nthreads=0; 1700 1701 PetscFunctionBegin; 1702 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1703 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1704 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1705 1706 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1707 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1708 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1709 if (mumps->use_petsc_omp_support) { 1710 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1711 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1712 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1713 #else 1714 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1715 #endif 1716 } else { 1717 mumps->omp_comm = PETSC_COMM_SELF; 1718 mumps->mumps_comm = mumps->petsc_comm; 1719 mumps->is_omp_master = PETSC_TRUE; 1720 } 1721 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1722 mumps->reqs = NULL; 1723 mumps->tag = 0; 1724 1725 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1726 mumps->id.job = JOB_INIT; 1727 mumps->id.par = 1; /* host participates factorizaton and solve */ 1728 mumps->id.sym = mumps->sym; 1729 1730 PetscMUMPS_c(mumps); 1731 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1732 1733 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1734 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1735 */ 1736 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1737 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1738 1739 mumps->scat_rhs = NULL; 1740 mumps->scat_sol = NULL; 1741 1742 /* set PETSc-MUMPS default options - override MUMPS default */ 1743 mumps->id.ICNTL(3) = 0; 1744 mumps->id.ICNTL(4) = 0; 1745 if (mumps->petsc_size == 1) { 1746 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1747 } else { 1748 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1749 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1750 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1751 } 1752 1753 /* schur */ 1754 mumps->id.size_schur = 0; 1755 mumps->id.listvar_schur = NULL; 1756 mumps->id.schur = NULL; 1757 mumps->sizeredrhs = 0; 1758 mumps->schur_sol = NULL; 1759 mumps->schur_sizesol = 0; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1764 { 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 if (mumps->id.INFOG(1) < 0) { 1769 if (A->erroriffailure) { 1770 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1771 } else { 1772 if (mumps->id.INFOG(1) == -6) { 1773 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1774 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1775 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1776 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1777 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1778 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1779 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1780 } else { 1781 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1782 F->factorerrortype = MAT_FACTOR_OTHER; 1783 } 1784 } 1785 } 1786 PetscFunctionReturn(0); 1787 } 1788 1789 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1790 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1791 { 1792 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1793 PetscErrorCode ierr; 1794 Vec b; 1795 const PetscInt M = A->rmap->N; 1796 1797 PetscFunctionBegin; 1798 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1799 1800 /* Set MUMPS options from the options database */ 1801 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1802 1803 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1804 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1805 1806 /* analysis phase */ 1807 /*----------------*/ 1808 mumps->id.job = JOB_FACTSYMBOLIC; 1809 mumps->id.n = M; 1810 switch (mumps->id.ICNTL(18)) { 1811 case 0: /* centralized assembled matrix input */ 1812 if (!mumps->myid) { 1813 mumps->id.nnz = mumps->nnz; 1814 mumps->id.irn = mumps->irn; 1815 mumps->id.jcn = mumps->jcn; 1816 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1817 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1818 /* 1819 PetscBool flag; 1820 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1821 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1822 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1823 */ 1824 if (!mumps->myid) { 1825 const PetscInt *idx; 1826 PetscInt i; 1827 1828 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1829 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1830 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1831 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1832 } 1833 } 1834 } 1835 break; 1836 case 3: /* distributed assembled matrix input (size>1) */ 1837 mumps->id.nnz_loc = mumps->nnz; 1838 mumps->id.irn_loc = mumps->irn; 1839 mumps->id.jcn_loc = mumps->jcn; 1840 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1841 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1842 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1843 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1844 ierr = VecDestroy(&b);CHKERRQ(ierr); 1845 break; 1846 } 1847 PetscMUMPS_c(mumps); 1848 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1849 1850 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1851 F->ops->solve = MatSolve_MUMPS; 1852 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1853 F->ops->matsolve = MatMatSolve_MUMPS; 1854 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1855 PetscFunctionReturn(0); 1856 } 1857 1858 /* Note the Petsc r and c permutations are ignored */ 1859 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1860 { 1861 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1862 PetscErrorCode ierr; 1863 Vec b; 1864 const PetscInt M = A->rmap->N; 1865 1866 PetscFunctionBegin; 1867 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1868 1869 /* Set MUMPS options from the options database */ 1870 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1871 1872 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1873 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1874 1875 /* analysis phase */ 1876 /*----------------*/ 1877 mumps->id.job = JOB_FACTSYMBOLIC; 1878 mumps->id.n = M; 1879 switch (mumps->id.ICNTL(18)) { 1880 case 0: /* centralized assembled matrix input */ 1881 if (!mumps->myid) { 1882 mumps->id.nnz = mumps->nnz; 1883 mumps->id.irn = mumps->irn; 1884 mumps->id.jcn = mumps->jcn; 1885 if (mumps->id.ICNTL(6)>1) { 1886 mumps->id.a = (MumpsScalar*)mumps->val; 1887 } 1888 } 1889 break; 1890 case 3: /* distributed assembled matrix input (size>1) */ 1891 mumps->id.nnz_loc = mumps->nnz; 1892 mumps->id.irn_loc = mumps->irn; 1893 mumps->id.jcn_loc = mumps->jcn; 1894 if (mumps->id.ICNTL(6)>1) { 1895 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1896 } 1897 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1898 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1899 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1900 ierr = VecDestroy(&b);CHKERRQ(ierr); 1901 break; 1902 } 1903 PetscMUMPS_c(mumps); 1904 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1905 1906 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1907 F->ops->solve = MatSolve_MUMPS; 1908 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /* Note the Petsc r permutation and factor info are ignored */ 1913 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1914 { 1915 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1916 PetscErrorCode ierr; 1917 Vec b; 1918 const PetscInt M = A->rmap->N; 1919 1920 PetscFunctionBegin; 1921 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1922 1923 /* Set MUMPS options from the options database */ 1924 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1925 1926 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1927 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1928 1929 /* analysis phase */ 1930 /*----------------*/ 1931 mumps->id.job = JOB_FACTSYMBOLIC; 1932 mumps->id.n = M; 1933 switch (mumps->id.ICNTL(18)) { 1934 case 0: /* centralized assembled matrix input */ 1935 if (!mumps->myid) { 1936 mumps->id.nnz = mumps->nnz; 1937 mumps->id.irn = mumps->irn; 1938 mumps->id.jcn = mumps->jcn; 1939 if (mumps->id.ICNTL(6)>1) { 1940 mumps->id.a = (MumpsScalar*)mumps->val; 1941 } 1942 } 1943 break; 1944 case 3: /* distributed assembled matrix input (size>1) */ 1945 mumps->id.nnz_loc = mumps->nnz; 1946 mumps->id.irn_loc = mumps->irn; 1947 mumps->id.jcn_loc = mumps->jcn; 1948 if (mumps->id.ICNTL(6)>1) { 1949 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1950 } 1951 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1952 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1953 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1954 ierr = VecDestroy(&b);CHKERRQ(ierr); 1955 break; 1956 } 1957 PetscMUMPS_c(mumps); 1958 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1959 1960 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1961 F->ops->solve = MatSolve_MUMPS; 1962 F->ops->solvetranspose = MatSolve_MUMPS; 1963 F->ops->matsolve = MatMatSolve_MUMPS; 1964 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1965 #if defined(PETSC_USE_COMPLEX) 1966 F->ops->getinertia = NULL; 1967 #else 1968 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1969 #endif 1970 PetscFunctionReturn(0); 1971 } 1972 1973 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1974 { 1975 PetscErrorCode ierr; 1976 PetscBool iascii; 1977 PetscViewerFormat format; 1978 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1979 1980 PetscFunctionBegin; 1981 /* check if matrix is mumps type */ 1982 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1983 1984 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1985 if (iascii) { 1986 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1987 if (format == PETSC_VIEWER_ASCII_INFO) { 1988 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1989 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1990 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1991 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1992 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1993 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1994 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1995 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1996 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1997 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1998 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1999 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2000 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2001 if (mumps->id.ICNTL(11)>0) { 2002 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2003 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2004 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2005 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2006 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2007 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2008 } 2009 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2010 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2011 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2012 /* ICNTL(15-17) not used */ 2013 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2014 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2015 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2016 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2017 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2018 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2019 2020 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2021 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2022 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2023 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2024 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2025 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2026 2027 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2028 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2029 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2030 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2031 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2032 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2033 2034 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2035 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2036 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2037 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2038 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2039 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2040 2041 /* infomation local to each processor */ 2042 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2043 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2044 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2045 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2046 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2047 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2048 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2049 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2050 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2051 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2052 2053 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2054 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2055 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2056 2057 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2058 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2059 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2060 2061 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2062 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2063 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2064 2065 if (mumps->ninfo && mumps->ninfo <= 80){ 2066 PetscInt i; 2067 for (i=0; i<mumps->ninfo; i++){ 2068 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2069 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2070 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2071 } 2072 } 2073 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2074 2075 if (!mumps->myid) { /* information from the host */ 2076 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2077 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2078 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2079 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2080 2081 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2082 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2083 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2084 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2085 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2086 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2087 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2088 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2089 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2090 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2091 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2092 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2093 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2094 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2095 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2096 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2097 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2098 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2099 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2100 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2101 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2102 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2103 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2104 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2105 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2106 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2107 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2108 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2109 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2110 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2111 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2112 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2113 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2114 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2115 } 2116 } 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2122 { 2123 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2124 2125 PetscFunctionBegin; 2126 info->block_size = 1.0; 2127 info->nz_allocated = mumps->id.INFOG(20); 2128 info->nz_used = mumps->id.INFOG(20); 2129 info->nz_unneeded = 0.0; 2130 info->assemblies = 0.0; 2131 info->mallocs = 0.0; 2132 info->memory = 0.0; 2133 info->fill_ratio_given = 0; 2134 info->fill_ratio_needed = 0; 2135 info->factor_mallocs = 0; 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* -------------------------------------------------------------------------------------------*/ 2140 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2141 { 2142 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2143 const PetscScalar *arr; 2144 const PetscInt *idxs; 2145 PetscInt size,i; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2150 if (mumps->petsc_size > 1) { 2151 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2152 2153 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2154 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2155 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2156 } 2157 2158 /* Schur complement matrix */ 2159 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2160 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2161 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2162 mumps->id.schur = (MumpsScalar*)arr; 2163 mumps->id.size_schur = size; 2164 mumps->id.schur_lld = size; 2165 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2166 if (mumps->sym == 1) { 2167 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2168 } 2169 2170 /* MUMPS expects Fortran style indices */ 2171 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2172 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2173 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2174 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2175 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2176 if (mumps->petsc_size > 1) { 2177 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2178 } else { 2179 if (F->factortype == MAT_FACTOR_LU) { 2180 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2181 } else { 2182 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2183 } 2184 } 2185 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2186 mumps->id.ICNTL(26) = -1; 2187 PetscFunctionReturn(0); 2188 } 2189 2190 /* -------------------------------------------------------------------------------------------*/ 2191 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2192 { 2193 Mat St; 2194 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2195 PetscScalar *array; 2196 #if defined(PETSC_USE_COMPLEX) 2197 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2198 #endif 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2203 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2204 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2205 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2206 ierr = MatSetUp(St);CHKERRQ(ierr); 2207 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2208 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2209 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2210 PetscInt i,j,N=mumps->id.size_schur; 2211 for (i=0;i<N;i++) { 2212 for (j=0;j<N;j++) { 2213 #if !defined(PETSC_USE_COMPLEX) 2214 PetscScalar val = mumps->id.schur[i*N+j]; 2215 #else 2216 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2217 #endif 2218 array[j*N+i] = val; 2219 } 2220 } 2221 } else { /* stored by columns */ 2222 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2223 } 2224 } else { /* either full or lower-triangular (not packed) */ 2225 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2226 PetscInt i,j,N=mumps->id.size_schur; 2227 for (i=0;i<N;i++) { 2228 for (j=i;j<N;j++) { 2229 #if !defined(PETSC_USE_COMPLEX) 2230 PetscScalar val = mumps->id.schur[i*N+j]; 2231 #else 2232 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2233 #endif 2234 array[i*N+j] = val; 2235 array[j*N+i] = val; 2236 } 2237 } 2238 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2239 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2240 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2241 PetscInt i,j,N=mumps->id.size_schur; 2242 for (i=0;i<N;i++) { 2243 for (j=0;j<i+1;j++) { 2244 #if !defined(PETSC_USE_COMPLEX) 2245 PetscScalar val = mumps->id.schur[i*N+j]; 2246 #else 2247 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2248 #endif 2249 array[i*N+j] = val; 2250 array[j*N+i] = val; 2251 } 2252 } 2253 } 2254 } 2255 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2256 *S = St; 2257 PetscFunctionReturn(0); 2258 } 2259 2260 /* -------------------------------------------------------------------------------------------*/ 2261 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2262 { 2263 PetscErrorCode ierr; 2264 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2265 2266 PetscFunctionBegin; 2267 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2272 { 2273 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2274 2275 PetscFunctionBegin; 2276 *ival = mumps->id.ICNTL(icntl); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 /*@ 2281 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2282 2283 Logically Collective on Mat 2284 2285 Input Parameters: 2286 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2287 . icntl - index of MUMPS parameter array ICNTL() 2288 - ival - value of MUMPS ICNTL(icntl) 2289 2290 Options Database: 2291 . -mat_mumps_icntl_<icntl> <ival> 2292 2293 Level: beginner 2294 2295 References: 2296 . MUMPS Users' Guide 2297 2298 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2299 @*/ 2300 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2301 { 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 PetscValidType(F,1); 2306 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2307 PetscValidLogicalCollectiveInt(F,icntl,2); 2308 PetscValidLogicalCollectiveInt(F,ival,3); 2309 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2310 PetscFunctionReturn(0); 2311 } 2312 2313 /*@ 2314 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2315 2316 Logically Collective on Mat 2317 2318 Input Parameters: 2319 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2320 - icntl - index of MUMPS parameter array ICNTL() 2321 2322 Output Parameter: 2323 . ival - value of MUMPS ICNTL(icntl) 2324 2325 Level: beginner 2326 2327 References: 2328 . MUMPS Users' Guide 2329 2330 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2331 @*/ 2332 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2333 { 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 PetscValidType(F,1); 2338 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2339 PetscValidLogicalCollectiveInt(F,icntl,2); 2340 PetscValidIntPointer(ival,3); 2341 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 /* -------------------------------------------------------------------------------------------*/ 2346 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2347 { 2348 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2349 2350 PetscFunctionBegin; 2351 mumps->id.CNTL(icntl) = val; 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2356 { 2357 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2358 2359 PetscFunctionBegin; 2360 *val = mumps->id.CNTL(icntl); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 /*@ 2365 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2366 2367 Logically Collective on Mat 2368 2369 Input Parameters: 2370 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2371 . icntl - index of MUMPS parameter array CNTL() 2372 - val - value of MUMPS CNTL(icntl) 2373 2374 Options Database: 2375 . -mat_mumps_cntl_<icntl> <val> 2376 2377 Level: beginner 2378 2379 References: 2380 . MUMPS Users' Guide 2381 2382 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2383 @*/ 2384 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2385 { 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 PetscValidType(F,1); 2390 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2391 PetscValidLogicalCollectiveInt(F,icntl,2); 2392 PetscValidLogicalCollectiveReal(F,val,3); 2393 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 /*@ 2398 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2399 2400 Logically Collective on Mat 2401 2402 Input Parameters: 2403 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2404 - icntl - index of MUMPS parameter array CNTL() 2405 2406 Output Parameter: 2407 . val - value of MUMPS CNTL(icntl) 2408 2409 Level: beginner 2410 2411 References: 2412 . MUMPS Users' Guide 2413 2414 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2415 @*/ 2416 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2417 { 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 PetscValidType(F,1); 2422 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2423 PetscValidLogicalCollectiveInt(F,icntl,2); 2424 PetscValidRealPointer(val,3); 2425 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2430 { 2431 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2432 2433 PetscFunctionBegin; 2434 *info = mumps->id.INFO(icntl); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2439 { 2440 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2441 2442 PetscFunctionBegin; 2443 *infog = mumps->id.INFOG(icntl); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2448 { 2449 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2450 2451 PetscFunctionBegin; 2452 *rinfo = mumps->id.RINFO(icntl); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2457 { 2458 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2459 2460 PetscFunctionBegin; 2461 *rinfog = mumps->id.RINFOG(icntl); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2466 { 2467 PetscErrorCode ierr; 2468 Mat Bt = NULL,Btseq = NULL; 2469 PetscBool flg; 2470 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2471 PetscScalar *aa; 2472 PetscInt spnr,*ia,*ja,M,nrhs; 2473 2474 PetscFunctionBegin; 2475 PetscValidIntPointer(spRHS,2); 2476 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2477 if (flg) { 2478 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2479 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2480 2481 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2482 2483 if (mumps->petsc_size > 1) { 2484 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2485 Btseq = b->A; 2486 } else { 2487 Btseq = Bt; 2488 } 2489 2490 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2491 mumps->id.nrhs = nrhs; 2492 mumps->id.lrhs = M; 2493 mumps->id.rhs = NULL; 2494 2495 if (!mumps->myid) { 2496 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2497 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2498 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2499 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2500 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2501 } else { 2502 mumps->id.irhs_ptr = NULL; 2503 mumps->id.irhs_sparse = NULL; 2504 mumps->id.nz_rhs = 0; 2505 mumps->id.rhs_sparse = NULL; 2506 } 2507 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2508 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2509 2510 /* solve phase */ 2511 /*-------------*/ 2512 mumps->id.job = JOB_SOLVE; 2513 PetscMUMPS_c(mumps); 2514 if (mumps->id.INFOG(1) < 0) 2515 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2516 2517 if (!mumps->myid) { 2518 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2519 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2520 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2521 } 2522 PetscFunctionReturn(0); 2523 } 2524 2525 /*@ 2526 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2527 2528 Logically Collective on Mat 2529 2530 Input Parameters: 2531 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2532 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2533 2534 Output Parameter: 2535 . spRHS - requested entries of inverse of A 2536 2537 Level: beginner 2538 2539 References: 2540 . MUMPS Users' Guide 2541 2542 .seealso: MatGetFactor(), MatCreateTranspose() 2543 @*/ 2544 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2545 { 2546 PetscErrorCode ierr; 2547 2548 PetscFunctionBegin; 2549 PetscValidType(F,1); 2550 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2551 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2556 { 2557 PetscErrorCode ierr; 2558 Mat spRHS; 2559 2560 PetscFunctionBegin; 2561 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2562 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2563 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 /*@ 2568 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2569 2570 Logically Collective on Mat 2571 2572 Input Parameters: 2573 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2574 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2575 2576 Output Parameter: 2577 . spRHST - requested entries of inverse of A^T 2578 2579 Level: beginner 2580 2581 References: 2582 . MUMPS Users' Guide 2583 2584 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2585 @*/ 2586 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2587 { 2588 PetscErrorCode ierr; 2589 PetscBool flg; 2590 2591 PetscFunctionBegin; 2592 PetscValidType(F,1); 2593 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2594 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2595 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2596 2597 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 /*@ 2602 MatMumpsGetInfo - Get MUMPS parameter INFO() 2603 2604 Logically Collective on Mat 2605 2606 Input Parameters: 2607 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2608 - icntl - index of MUMPS parameter array INFO() 2609 2610 Output Parameter: 2611 . ival - value of MUMPS INFO(icntl) 2612 2613 Level: beginner 2614 2615 References: 2616 . MUMPS Users' Guide 2617 2618 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2619 @*/ 2620 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2621 { 2622 PetscErrorCode ierr; 2623 2624 PetscFunctionBegin; 2625 PetscValidType(F,1); 2626 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2627 PetscValidIntPointer(ival,3); 2628 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 /*@ 2633 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2634 2635 Logically Collective on Mat 2636 2637 Input Parameters: 2638 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2639 - icntl - index of MUMPS parameter array INFOG() 2640 2641 Output Parameter: 2642 . ival - value of MUMPS INFOG(icntl) 2643 2644 Level: beginner 2645 2646 References: 2647 . MUMPS Users' Guide 2648 2649 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2650 @*/ 2651 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2652 { 2653 PetscErrorCode ierr; 2654 2655 PetscFunctionBegin; 2656 PetscValidType(F,1); 2657 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2658 PetscValidIntPointer(ival,3); 2659 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 /*@ 2664 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2665 2666 Logically Collective on Mat 2667 2668 Input Parameters: 2669 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2670 - icntl - index of MUMPS parameter array RINFO() 2671 2672 Output Parameter: 2673 . val - value of MUMPS RINFO(icntl) 2674 2675 Level: beginner 2676 2677 References: 2678 . MUMPS Users' Guide 2679 2680 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2681 @*/ 2682 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2683 { 2684 PetscErrorCode ierr; 2685 2686 PetscFunctionBegin; 2687 PetscValidType(F,1); 2688 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2689 PetscValidRealPointer(val,3); 2690 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 /*@ 2695 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2696 2697 Logically Collective on Mat 2698 2699 Input Parameters: 2700 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2701 - icntl - index of MUMPS parameter array RINFOG() 2702 2703 Output Parameter: 2704 . val - value of MUMPS RINFOG(icntl) 2705 2706 Level: beginner 2707 2708 References: 2709 . MUMPS Users' Guide 2710 2711 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2712 @*/ 2713 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2714 { 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 PetscValidType(F,1); 2719 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2720 PetscValidRealPointer(val,3); 2721 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 /*MC 2726 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2727 distributed and sequential matrices via the external package MUMPS. 2728 2729 Works with MATAIJ and MATSBAIJ matrices 2730 2731 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2732 2733 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2734 2735 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2736 2737 Options Database Keys: 2738 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2739 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2740 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2741 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2742 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2743 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2744 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2745 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2746 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2747 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2748 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2749 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2750 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2751 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2752 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2753 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2754 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2755 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2756 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2757 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2758 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2759 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2760 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2761 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2762 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2763 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2764 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2765 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2766 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2767 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2768 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2769 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2770 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2771 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2772 2773 Level: beginner 2774 2775 Notes: 2776 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2777 2778 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2779 $ KSPGetPC(ksp,&pc); 2780 $ PCFactorGetMatrix(pc,&mat); 2781 $ MatMumpsGetInfo(mat,....); 2782 $ MatMumpsGetInfog(mat,....); etc. 2783 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2784 2785 Two modes to run MUMPS/PETSc with OpenMP 2786 2787 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2788 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2789 2790 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2791 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2792 2793 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2794 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2795 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2796 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2797 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2798 2799 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2800 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2801 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2802 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2803 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2804 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2805 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2806 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2807 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2808 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2809 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2810 examine the mapping result. 2811 2812 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2813 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2814 calls omp_set_num_threads(m) internally before calling MUMPS. 2815 2816 References: 2817 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2818 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2819 2820 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2821 2822 M*/ 2823 2824 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2825 { 2826 PetscFunctionBegin; 2827 *type = MATSOLVERMUMPS; 2828 PetscFunctionReturn(0); 2829 } 2830 2831 /* MatGetFactor for Seq and MPI AIJ matrices */ 2832 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2833 { 2834 Mat B; 2835 PetscErrorCode ierr; 2836 Mat_MUMPS *mumps; 2837 PetscBool isSeqAIJ; 2838 2839 PetscFunctionBegin; 2840 #if defined(PETSC_USE_COMPLEX) 2841 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2842 #endif 2843 /* Create the factorization matrix */ 2844 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2845 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2846 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2847 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2848 ierr = MatSetUp(B);CHKERRQ(ierr); 2849 2850 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2851 2852 B->ops->view = MatView_MUMPS; 2853 B->ops->getinfo = MatGetInfo_MUMPS; 2854 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2860 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2861 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2868 2869 if (ftype == MAT_FACTOR_LU) { 2870 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2871 B->factortype = MAT_FACTOR_LU; 2872 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2873 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2874 mumps->sym = 0; 2875 } else { 2876 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2877 B->factortype = MAT_FACTOR_CHOLESKY; 2878 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2879 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2880 #if defined(PETSC_USE_COMPLEX) 2881 mumps->sym = 2; 2882 #else 2883 if (A->spd_set && A->spd) mumps->sym = 1; 2884 else mumps->sym = 2; 2885 #endif 2886 } 2887 2888 /* set solvertype */ 2889 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2890 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2891 2892 B->ops->destroy = MatDestroy_MUMPS; 2893 B->data = (void*)mumps; 2894 2895 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2896 2897 *F = B; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2902 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2903 { 2904 Mat B; 2905 PetscErrorCode ierr; 2906 Mat_MUMPS *mumps; 2907 PetscBool isSeqSBAIJ; 2908 2909 PetscFunctionBegin; 2910 #if defined(PETSC_USE_COMPLEX) 2911 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2912 #endif 2913 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2914 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2915 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2916 ierr = MatSetUp(B);CHKERRQ(ierr); 2917 2918 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2919 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2920 if (isSeqSBAIJ) { 2921 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2922 } else { 2923 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2924 } 2925 2926 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2927 B->ops->view = MatView_MUMPS; 2928 B->ops->getinfo = MatGetInfo_MUMPS; 2929 2930 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2931 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2932 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2933 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2934 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2935 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2936 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2937 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2938 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2940 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2941 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2942 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2943 2944 B->factortype = MAT_FACTOR_CHOLESKY; 2945 #if defined(PETSC_USE_COMPLEX) 2946 mumps->sym = 2; 2947 #else 2948 if (A->spd_set && A->spd) mumps->sym = 1; 2949 else mumps->sym = 2; 2950 #endif 2951 2952 /* set solvertype */ 2953 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2954 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2955 2956 B->ops->destroy = MatDestroy_MUMPS; 2957 B->data = (void*)mumps; 2958 2959 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2960 2961 *F = B; 2962 PetscFunctionReturn(0); 2963 } 2964 2965 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2966 { 2967 Mat B; 2968 PetscErrorCode ierr; 2969 Mat_MUMPS *mumps; 2970 PetscBool isSeqBAIJ; 2971 2972 PetscFunctionBegin; 2973 /* Create the factorization matrix */ 2974 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2975 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2976 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2977 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2978 ierr = MatSetUp(B);CHKERRQ(ierr); 2979 2980 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2981 if (ftype == MAT_FACTOR_LU) { 2982 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2983 B->factortype = MAT_FACTOR_LU; 2984 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2985 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2986 mumps->sym = 0; 2987 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2988 2989 B->ops->view = MatView_MUMPS; 2990 B->ops->getinfo = MatGetInfo_MUMPS; 2991 2992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2994 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2996 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3005 3006 /* set solvertype */ 3007 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3008 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3009 3010 B->ops->destroy = MatDestroy_MUMPS; 3011 B->data = (void*)mumps; 3012 3013 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3014 3015 *F = B; 3016 PetscFunctionReturn(0); 3017 } 3018 3019 /* MatGetFactor for Seq and MPI SELL matrices */ 3020 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3021 { 3022 Mat B; 3023 PetscErrorCode ierr; 3024 Mat_MUMPS *mumps; 3025 PetscBool isSeqSELL; 3026 3027 PetscFunctionBegin; 3028 /* Create the factorization matrix */ 3029 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3030 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3031 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3032 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3033 ierr = MatSetUp(B);CHKERRQ(ierr); 3034 3035 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3036 3037 B->ops->view = MatView_MUMPS; 3038 B->ops->getinfo = MatGetInfo_MUMPS; 3039 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3051 3052 if (ftype == MAT_FACTOR_LU) { 3053 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3054 B->factortype = MAT_FACTOR_LU; 3055 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3056 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3057 mumps->sym = 0; 3058 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3059 3060 /* set solvertype */ 3061 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3062 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3063 3064 B->ops->destroy = MatDestroy_MUMPS; 3065 B->data = (void*)mumps; 3066 3067 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3068 3069 *F = B; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3074 { 3075 PetscErrorCode ierr; 3076 3077 PetscFunctionBegin; 3078 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3079 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3080 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3081 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3082 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3083 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3084 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3085 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3086 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3087 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3088 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3089 PetscFunctionReturn(0); 3090 } 3091 3092