1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 344 PetscFunctionBegin; 345 *v = aa->a; 346 if (reuse == MAT_INITIAL_MATRIX) { 347 nz = aa->nz; 348 ai = aa->i; 349 aj = aa->j; 350 *v = aa->a; 351 *nnz = nz; 352 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 353 col = row + nz; 354 355 nz = 0; 356 for (i=0; i<M; i++) { 357 rnz = ai[i+1] - ai[i]; 358 ajj = aj + ai[i]; 359 for (j=0; j<rnz; j++) { 360 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 361 } 362 } 363 *r = row; *c = col; 364 } 365 PetscFunctionReturn(0); 366 } 367 368 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 369 { 370 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 371 PetscInt nz,rnz,i,j; 372 const PetscScalar *av,*v1; 373 PetscScalar *val; 374 PetscErrorCode ierr; 375 PetscInt *row,*col; 376 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 377 PetscBool missing; 378 379 PetscFunctionBegin; 380 ai = aa->i; aj = aa->j; av = aa->a; 381 adiag = aa->diag; 382 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 383 if (reuse == MAT_INITIAL_MATRIX) { 384 /* count nz in the upper triangular part of A */ 385 nz = 0; 386 if (missing) { 387 for (i=0; i<M; i++) { 388 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 389 for (j=ai[i];j<ai[i+1];j++) { 390 if (aj[j] < i) continue; 391 nz++; 392 } 393 } else { 394 nz += ai[i+1] - adiag[i]; 395 } 396 } 397 } else { 398 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 399 } 400 *nnz = nz; 401 402 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 403 col = row + nz; 404 val = (PetscScalar*)(col + nz); 405 406 nz = 0; 407 if (missing) { 408 for (i=0; i<M; i++) { 409 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 410 for (j=ai[i];j<ai[i+1];j++) { 411 if (aj[j] < i) continue; 412 row[nz] = i+shift; 413 col[nz] = aj[j]+shift; 414 val[nz] = av[j]; 415 nz++; 416 } 417 } else { 418 rnz = ai[i+1] - adiag[i]; 419 ajj = aj + adiag[i]; 420 v1 = av + adiag[i]; 421 for (j=0; j<rnz; j++) { 422 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 423 } 424 } 425 } 426 } else { 427 for (i=0; i<M; i++) { 428 rnz = ai[i+1] - adiag[i]; 429 ajj = aj + adiag[i]; 430 v1 = av + adiag[i]; 431 for (j=0; j<rnz; j++) { 432 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 433 } 434 } 435 } 436 *r = row; *c = col; *v = val; 437 } else { 438 nz = 0; val = *v; 439 if (missing) { 440 for (i=0; i <M; i++) { 441 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 442 for (j=ai[i];j<ai[i+1];j++) { 443 if (aj[j] < i) continue; 444 val[nz++] = av[j]; 445 } 446 } else { 447 rnz = ai[i+1] - adiag[i]; 448 v1 = av + adiag[i]; 449 for (j=0; j<rnz; j++) { 450 val[nz++] = v1[j]; 451 } 452 } 453 } 454 } else { 455 for (i=0; i <M; i++) { 456 rnz = ai[i+1] - adiag[i]; 457 v1 = av + adiag[i]; 458 for (j=0; j<rnz; j++) { 459 val[nz++] = v1[j]; 460 } 461 } 462 } 463 } 464 PetscFunctionReturn(0); 465 } 466 467 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 468 { 469 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 470 PetscErrorCode ierr; 471 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 472 PetscInt *row,*col; 473 const PetscScalar *av, *bv,*v1,*v2; 474 PetscScalar *val; 475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 476 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 477 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 478 479 PetscFunctionBegin; 480 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 481 av=aa->a; bv=bb->a; 482 483 garray = mat->garray; 484 485 if (reuse == MAT_INITIAL_MATRIX) { 486 nz = aa->nz + bb->nz; 487 *nnz = nz; 488 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 489 col = row + nz; 490 val = (PetscScalar*)(col + nz); 491 492 *r = row; *c = col; *v = val; 493 } else { 494 row = *r; col = *c; val = *v; 495 } 496 497 jj = 0; irow = rstart; 498 for (i=0; i<m; i++) { 499 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 500 countA = ai[i+1] - ai[i]; 501 countB = bi[i+1] - bi[i]; 502 bjj = bj + bi[i]; 503 v1 = av + ai[i]; 504 v2 = bv + bi[i]; 505 506 /* A-part */ 507 for (j=0; j<countA; j++) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 510 } 511 val[jj++] = v1[j]; 512 } 513 514 /* B-part */ 515 for (j=0; j < countB; j++) { 516 if (reuse == MAT_INITIAL_MATRIX) { 517 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 518 } 519 val[jj++] = v2[j]; 520 } 521 irow++; 522 } 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 527 { 528 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av, *bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 535 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 536 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 537 538 PetscFunctionBegin; 539 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 540 av=aa->a; bv=bb->a; 541 542 garray = mat->garray; 543 544 if (reuse == MAT_INITIAL_MATRIX) { 545 nz = aa->nz + bb->nz; 546 *nnz = nz; 547 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 548 col = row + nz; 549 val = (PetscScalar*)(col + nz); 550 551 *r = row; *c = col; *v = val; 552 } else { 553 row = *r; col = *c; val = *v; 554 } 555 556 jj = 0; irow = rstart; 557 for (i=0; i<m; i++) { 558 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 559 countA = ai[i+1] - ai[i]; 560 countB = bi[i+1] - bi[i]; 561 bjj = bj + bi[i]; 562 v1 = av + ai[i]; 563 v2 = bv + bi[i]; 564 565 /* A-part */ 566 for (j=0; j<countA; j++) { 567 if (reuse == MAT_INITIAL_MATRIX) { 568 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 569 } 570 val[jj++] = v1[j]; 571 } 572 573 /* B-part */ 574 for (j=0; j < countB; j++) { 575 if (reuse == MAT_INITIAL_MATRIX) { 576 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 577 } 578 val[jj++] = v2[j]; 579 } 580 irow++; 581 } 582 PetscFunctionReturn(0); 583 } 584 585 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 586 { 587 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 588 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 589 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 590 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 591 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 592 const PetscInt bs2=mat->bs2; 593 PetscErrorCode ierr; 594 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 595 PetscInt *row,*col; 596 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 597 PetscScalar *val; 598 599 PetscFunctionBegin; 600 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 601 if (reuse == MAT_INITIAL_MATRIX) { 602 nz = bs2*(aa->nz + bb->nz); 603 *nnz = nz; 604 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 605 col = row + nz; 606 val = (PetscScalar*)(col + nz); 607 608 *r = row; *c = col; *v = val; 609 } else { 610 row = *r; col = *c; val = *v; 611 } 612 613 jj = 0; irow = rstart; 614 for (i=0; i<mbs; i++) { 615 countA = ai[i+1] - ai[i]; 616 countB = bi[i+1] - bi[i]; 617 ajj = aj + ai[i]; 618 bjj = bj + bi[i]; 619 v1 = av + bs2*ai[i]; 620 v2 = bv + bs2*bi[i]; 621 622 idx = 0; 623 /* A-part */ 624 for (k=0; k<countA; k++) { 625 for (j=0; j<bs; j++) { 626 for (n=0; n<bs; n++) { 627 if (reuse == MAT_INITIAL_MATRIX) { 628 row[jj] = irow + n + shift; 629 col[jj] = rstart + bs*ajj[k] + j + shift; 630 } 631 val[jj++] = v1[idx++]; 632 } 633 } 634 } 635 636 idx = 0; 637 /* B-part */ 638 for (k=0; k<countB; k++) { 639 for (j=0; j<bs; j++) { 640 for (n=0; n<bs; n++) { 641 if (reuse == MAT_INITIAL_MATRIX) { 642 row[jj] = irow + n + shift; 643 col[jj] = bs*garray[bjj[k]] + j + shift; 644 } 645 val[jj++] = v2[idx++]; 646 } 647 } 648 } 649 irow += bs; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 655 { 656 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 657 PetscErrorCode ierr; 658 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 659 PetscInt *row,*col; 660 const PetscScalar *av, *bv,*v1,*v2; 661 PetscScalar *val; 662 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 663 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 664 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 665 666 PetscFunctionBegin; 667 ai=aa->i; aj=aa->j; adiag=aa->diag; 668 bi=bb->i; bj=bb->j; garray = mat->garray; 669 av=aa->a; bv=bb->a; 670 671 rstart = A->rmap->rstart; 672 673 if (reuse == MAT_INITIAL_MATRIX) { 674 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 675 nzb = 0; /* num of upper triangular entries in mat->B */ 676 for (i=0; i<m; i++) { 677 nza += (ai[i+1] - adiag[i]); 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 for (j=0; j<countB; j++) { 681 if (garray[bjj[j]] > rstart) nzb++; 682 } 683 } 684 685 nz = nza + nzb; /* total nz of upper triangular part of mat */ 686 *nnz = nz; 687 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 688 col = row + nz; 689 val = (PetscScalar*)(col + nz); 690 691 *r = row; *c = col; *v = val; 692 } else { 693 row = *r; col = *c; val = *v; 694 } 695 696 jj = 0; irow = rstart; 697 for (i=0; i<m; i++) { 698 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 699 v1 = av + adiag[i]; 700 countA = ai[i+1] - adiag[i]; 701 countB = bi[i+1] - bi[i]; 702 bjj = bj + bi[i]; 703 v2 = bv + bi[i]; 704 705 /* A-part */ 706 for (j=0; j<countA; j++) { 707 if (reuse == MAT_INITIAL_MATRIX) { 708 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 709 } 710 val[jj++] = v1[j]; 711 } 712 713 /* B-part */ 714 for (j=0; j < countB; j++) { 715 if (garray[bjj[j]] > rstart) { 716 if (reuse == MAT_INITIAL_MATRIX) { 717 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 718 } 719 val[jj++] = v2[j]; 720 } 721 } 722 irow++; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatDestroy_MUMPS(Mat A) 728 { 729 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 734 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 735 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 736 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 737 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 738 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 739 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 740 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 741 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 742 mumps->id.job = JOB_END; 743 PetscMUMPS_c(mumps); 744 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 745 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 746 #endif 747 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 748 ierr = PetscFree(A->data);CHKERRQ(ierr); 749 750 /* clear composed functions */ 751 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 768 { 769 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 770 PetscScalar *array; 771 Vec b_seq; 772 IS is_iden,is_petsc; 773 PetscErrorCode ierr; 774 PetscInt i; 775 PetscBool second_solve = PETSC_FALSE; 776 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 777 778 PetscFunctionBegin; 779 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 780 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 781 782 if (A->factorerrortype) { 783 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 784 ierr = VecSetInf(x);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 mumps->id.ICNTL(20)= 0; /* dense RHS */ 789 mumps->id.nrhs = 1; 790 b_seq = mumps->b_seq; 791 if (mumps->petsc_size > 1) { 792 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 793 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 796 } else { /* petsc_size == 1 */ 797 ierr = VecCopy(b,x);CHKERRQ(ierr); 798 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 799 } 800 if (!mumps->myid) { /* define rhs on the host */ 801 mumps->id.nrhs = 1; 802 mumps->id.rhs = (MumpsScalar*)array; 803 } 804 805 /* 806 handle condensation step of Schur complement (if any) 807 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 808 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 809 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 810 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 811 */ 812 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 813 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 814 second_solve = PETSC_TRUE; 815 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 816 } 817 /* solve phase */ 818 /*-------------*/ 819 mumps->id.job = JOB_SOLVE; 820 PetscMUMPS_c(mumps); 821 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 822 823 /* handle expansion step of Schur complement (if any) */ 824 if (second_solve) { 825 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 826 } 827 828 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 829 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 830 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 831 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 832 } 833 if (!mumps->scat_sol) { /* create scatter scat_sol */ 834 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 835 for (i=0; i<mumps->id.lsol_loc; i++) { 836 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 837 } 838 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 839 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 840 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 841 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 842 843 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 844 } 845 846 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 } 849 850 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 851 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 852 853 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 858 { 859 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 mumps->id.ICNTL(9) = 0; 864 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 865 mumps->id.ICNTL(9) = 1; 866 PetscFunctionReturn(0); 867 } 868 869 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 870 { 871 PetscErrorCode ierr; 872 Mat Bt = NULL; 873 PetscBool flg, flgT; 874 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 875 PetscInt i,nrhs,M; 876 PetscScalar *array; 877 const PetscScalar *rbray; 878 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 879 PetscScalar *bray,*sol_loc,*sol_loc_save; 880 IS is_to,is_from; 881 PetscInt k,proc,j,m,myrstart; 882 const PetscInt *rstart; 883 Vec v_mpi,b_seq,msol_loc; 884 VecScatter scat_rhs,scat_sol; 885 PetscScalar *aa; 886 PetscInt spnr,*ia,*ja; 887 Mat_MPIAIJ *b = NULL; 888 889 PetscFunctionBegin; 890 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 891 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 892 893 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 894 if (flg) { /* dense B */ 895 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 896 mumps->id.ICNTL(20)= 0; /* dense RHS */ 897 } else { /* sparse B */ 898 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 899 if (flgT) { /* input B is transpose of actural RHS matrix, 900 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 901 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 902 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 903 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 904 } 905 906 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 907 mumps->id.nrhs = nrhs; 908 mumps->id.lrhs = M; 909 mumps->id.rhs = NULL; 910 911 if (mumps->petsc_size == 1) { 912 PetscScalar *aa; 913 PetscInt spnr,*ia,*ja; 914 PetscBool second_solve = PETSC_FALSE; 915 916 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 917 mumps->id.rhs = (MumpsScalar*)array; 918 919 if (!Bt) { /* dense B */ 920 /* copy B to X */ 921 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 922 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 923 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 924 } else { /* sparse B */ 925 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 926 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 927 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 928 /* mumps requires ia and ja start at 1! */ 929 mumps->id.irhs_ptr = ia; 930 mumps->id.irhs_sparse = ja; 931 mumps->id.nz_rhs = ia[spnr] - 1; 932 mumps->id.rhs_sparse = (MumpsScalar*)aa; 933 } 934 /* handle condensation step of Schur complement (if any) */ 935 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 936 second_solve = PETSC_TRUE; 937 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 938 } 939 /* solve phase */ 940 /*-------------*/ 941 mumps->id.job = JOB_SOLVE; 942 PetscMUMPS_c(mumps); 943 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 944 945 /* handle expansion step of Schur complement (if any) */ 946 if (second_solve) { 947 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 948 } 949 if (Bt) { /* sparse B */ 950 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 951 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 952 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 953 } 954 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 959 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 960 961 /* create msol_loc to hold mumps local solution */ 962 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 963 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 964 965 lsol_loc = mumps->id.lsol_loc; 966 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 967 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 968 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 969 mumps->id.isol_loc = isol_loc; 970 971 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 972 973 if (!Bt) { /* dense B */ 974 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 975 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 976 0, re-arrange B into desired order, which is a local operation. 977 */ 978 979 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 980 /* wrap dense rhs matrix B into a vector v_mpi */ 981 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 982 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 983 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 984 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 985 986 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 987 if (!mumps->myid) { 988 PetscInt *idx; 989 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 990 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 991 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 992 k = 0; 993 for (proc=0; proc<mumps->petsc_size; proc++){ 994 for (j=0; j<nrhs; j++){ 995 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 996 } 997 } 998 999 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1000 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1001 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1002 } else { 1003 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1004 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1005 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1006 } 1007 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1008 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1010 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1011 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 1013 if (!mumps->myid) { /* define rhs on the host */ 1014 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1015 mumps->id.rhs = (MumpsScalar*)bray; 1016 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1017 } 1018 1019 } else { /* sparse B */ 1020 b = (Mat_MPIAIJ*)Bt->data; 1021 1022 /* wrap dense X into a vector v_mpi */ 1023 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1024 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1025 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1026 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1027 1028 if (!mumps->myid) { 1029 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1030 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1031 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1032 /* mumps requires ia and ja start at 1! */ 1033 mumps->id.irhs_ptr = ia; 1034 mumps->id.irhs_sparse = ja; 1035 mumps->id.nz_rhs = ia[spnr] - 1; 1036 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1037 } else { 1038 mumps->id.irhs_ptr = NULL; 1039 mumps->id.irhs_sparse = NULL; 1040 mumps->id.nz_rhs = 0; 1041 mumps->id.rhs_sparse = NULL; 1042 } 1043 } 1044 1045 /* solve phase */ 1046 /*-------------*/ 1047 mumps->id.job = JOB_SOLVE; 1048 PetscMUMPS_c(mumps); 1049 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1050 1051 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1052 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1053 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1054 1055 /* create scatter scat_sol */ 1056 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1057 /* iidx: index for scatter mumps solution to petsc X */ 1058 1059 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1060 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1061 for (i=0; i<lsol_loc; i++) { 1062 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1063 1064 for (proc=0; proc<mumps->petsc_size; proc++){ 1065 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1066 myrstart = rstart[proc]; 1067 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1068 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1069 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1070 break; 1071 } 1072 } 1073 1074 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1075 } 1076 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1077 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1078 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1079 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1080 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1083 1084 /* free spaces */ 1085 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1086 mumps->id.isol_loc = isol_loc_save; 1087 1088 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1089 ierr = PetscFree(idxx);CHKERRQ(ierr); 1090 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1091 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1092 if (Bt) { 1093 if (!mumps->myid) { 1094 b = (Mat_MPIAIJ*)Bt->data; 1095 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1096 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1097 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1098 } 1099 } else { 1100 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1101 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1102 } 1103 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1104 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1109 { 1110 PetscErrorCode ierr; 1111 PetscBool flg; 1112 Mat B; 1113 1114 PetscFunctionBegin; 1115 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1116 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1117 1118 /* Create B=Bt^T that uses Bt's data structure */ 1119 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1120 1121 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1122 ierr = MatDestroy(&B);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #if !defined(PETSC_USE_COMPLEX) 1127 /* 1128 input: 1129 F: numeric factor 1130 output: 1131 nneg: total number of negative pivots 1132 nzero: total number of zero pivots 1133 npos: (global dimension of F) - nneg - nzero 1134 */ 1135 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1136 { 1137 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1138 PetscErrorCode ierr; 1139 PetscMPIInt size; 1140 1141 PetscFunctionBegin; 1142 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1143 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1144 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1145 1146 if (nneg) *nneg = mumps->id.INFOG(12); 1147 if (nzero || npos) { 1148 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1149 if (nzero) *nzero = mumps->id.INFOG(28); 1150 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 #endif 1155 1156 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1157 { 1158 PetscErrorCode ierr; 1159 PetscInt i,nz=0,*irn,*jcn=0; 1160 PetscScalar *val=0; 1161 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1162 1163 PetscFunctionBegin; 1164 if (mumps->omp_comm_size > 1) { 1165 if (reuse == MAT_INITIAL_MATRIX) { 1166 /* master first gathers counts of nonzeros to receive */ 1167 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1168 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1169 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1170 1171 /* master allocates memory to receive nonzeros */ 1172 if (mumps->is_omp_master) { 1173 displs[0] = 0; 1174 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1175 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1176 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1177 jcn = irn + nz; 1178 val = (PetscScalar*)(jcn + nz); 1179 } 1180 1181 /* save the gatherv plan */ 1182 mumps->mpinz = mpinz; /* used as send count */ 1183 mumps->recvcount = recvcount; 1184 mumps->displs = displs; 1185 1186 /* master gathers nonzeros */ 1187 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1188 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1189 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1190 1191 /* master frees its row/col/val and replaces them with bigger arrays */ 1192 if (mumps->is_omp_master) { 1193 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1194 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1195 mumps->irn = irn; 1196 mumps->jcn = jcn; 1197 mumps->val = val; 1198 } 1199 } else { 1200 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1201 } 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1207 { 1208 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1209 PetscErrorCode ierr; 1210 PetscBool isMPIAIJ; 1211 1212 PetscFunctionBegin; 1213 if (mumps->id.INFOG(1) < 0) { 1214 if (mumps->id.INFOG(1) == -6) { 1215 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1216 } 1217 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1222 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1223 1224 /* numerical factorization phase */ 1225 /*-------------------------------*/ 1226 mumps->id.job = JOB_FACTNUMERIC; 1227 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1228 if (!mumps->myid) { 1229 mumps->id.a = (MumpsScalar*)mumps->val; 1230 } 1231 } else { 1232 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1233 } 1234 PetscMUMPS_c(mumps); 1235 if (mumps->id.INFOG(1) < 0) { 1236 if (A->erroriffailure) { 1237 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1238 } else { 1239 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1240 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1241 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1242 } else if (mumps->id.INFOG(1) == -13) { 1243 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1244 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1245 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1246 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1247 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1248 } else { 1249 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1250 F->factorerrortype = MAT_FACTOR_OTHER; 1251 } 1252 } 1253 } 1254 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1255 1256 F->assembled = PETSC_TRUE; 1257 mumps->matstruc = SAME_NONZERO_PATTERN; 1258 if (F->schur) { /* reset Schur status to unfactored */ 1259 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1260 mumps->id.ICNTL(19) = 2; 1261 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1262 } 1263 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1264 } 1265 1266 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1267 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1268 1269 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1270 if (mumps->petsc_size > 1) { 1271 PetscInt lsol_loc; 1272 PetscScalar *sol_loc; 1273 1274 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1275 1276 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1277 if (mumps->x_seq) { 1278 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1279 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1280 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1281 } 1282 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1283 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1284 mumps->id.lsol_loc = lsol_loc; 1285 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1286 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1287 } 1288 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 /* Sets MUMPS options from the options database */ 1293 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1294 { 1295 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1296 PetscErrorCode ierr; 1297 PetscInt icntl,info[80],i,ninfo=80; 1298 PetscBool flg; 1299 1300 PetscFunctionBegin; 1301 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1302 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1303 if (flg) mumps->id.ICNTL(1) = icntl; 1304 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1305 if (flg) mumps->id.ICNTL(2) = icntl; 1306 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1307 if (flg) mumps->id.ICNTL(3) = icntl; 1308 1309 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1310 if (flg) mumps->id.ICNTL(4) = icntl; 1311 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1312 1313 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1314 if (flg) mumps->id.ICNTL(6) = icntl; 1315 1316 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1317 if (flg) { 1318 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1319 else mumps->id.ICNTL(7) = icntl; 1320 } 1321 1322 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1323 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1324 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1325 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1326 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1327 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1328 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1329 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1330 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1331 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1332 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1333 } 1334 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1335 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1336 1337 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1338 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1339 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1340 if (mumps->id.ICNTL(24)) { 1341 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1342 } 1343 1344 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1345 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1346 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1348 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1349 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1350 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1351 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1352 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1353 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1354 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1355 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1356 1357 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1358 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1359 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1360 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1361 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1362 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1363 1364 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1365 1366 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1367 if (ninfo) { 1368 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1369 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1370 mumps->ninfo = ninfo; 1371 for (i=0; i<ninfo; i++) { 1372 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1373 else mumps->info[i] = info[i]; 1374 } 1375 } 1376 1377 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1382 { 1383 PetscErrorCode ierr; 1384 PetscInt nthreads=0; 1385 1386 PetscFunctionBegin; 1387 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1388 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1389 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1390 1391 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1392 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1393 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1394 if (mumps->use_petsc_omp_support) { 1395 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1396 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1397 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1398 #else 1399 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1400 #endif 1401 } else { 1402 mumps->omp_comm = PETSC_COMM_SELF; 1403 mumps->mumps_comm = mumps->petsc_comm; 1404 mumps->is_omp_master = PETSC_TRUE; 1405 } 1406 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1407 1408 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1409 mumps->id.job = JOB_INIT; 1410 mumps->id.par = 1; /* host participates factorizaton and solve */ 1411 mumps->id.sym = mumps->sym; 1412 1413 PetscMUMPS_c(mumps); 1414 1415 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1416 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1417 */ 1418 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1419 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1420 1421 mumps->scat_rhs = NULL; 1422 mumps->scat_sol = NULL; 1423 1424 /* set PETSc-MUMPS default options - override MUMPS default */ 1425 mumps->id.ICNTL(3) = 0; 1426 mumps->id.ICNTL(4) = 0; 1427 if (mumps->petsc_size == 1) { 1428 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1429 } else { 1430 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1431 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1432 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1433 } 1434 1435 /* schur */ 1436 mumps->id.size_schur = 0; 1437 mumps->id.listvar_schur = NULL; 1438 mumps->id.schur = NULL; 1439 mumps->sizeredrhs = 0; 1440 mumps->schur_sol = NULL; 1441 mumps->schur_sizesol = 0; 1442 PetscFunctionReturn(0); 1443 } 1444 1445 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1446 { 1447 PetscErrorCode ierr; 1448 1449 PetscFunctionBegin; 1450 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1451 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1452 if (mumps->id.INFOG(1) < 0) { 1453 if (A->erroriffailure) { 1454 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1455 } else { 1456 if (mumps->id.INFOG(1) == -6) { 1457 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1458 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1459 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1460 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1461 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1462 } else { 1463 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1464 F->factorerrortype = MAT_FACTOR_OTHER; 1465 } 1466 } 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1472 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1473 { 1474 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1475 PetscErrorCode ierr; 1476 Vec b; 1477 const PetscInt M = A->rmap->N; 1478 1479 PetscFunctionBegin; 1480 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1481 1482 /* Set MUMPS options from the options database */ 1483 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1484 1485 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1486 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1487 1488 /* analysis phase */ 1489 /*----------------*/ 1490 mumps->id.job = JOB_FACTSYMBOLIC; 1491 mumps->id.n = M; 1492 switch (mumps->id.ICNTL(18)) { 1493 case 0: /* centralized assembled matrix input */ 1494 if (!mumps->myid) { 1495 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1496 if (mumps->id.ICNTL(6)>1) { 1497 mumps->id.a = (MumpsScalar*)mumps->val; 1498 } 1499 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1500 /* 1501 PetscBool flag; 1502 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1503 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1504 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1505 */ 1506 if (!mumps->myid) { 1507 const PetscInt *idx; 1508 PetscInt i,*perm_in; 1509 1510 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1511 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1512 1513 mumps->id.perm_in = perm_in; 1514 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1515 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1516 } 1517 } 1518 } 1519 break; 1520 case 3: /* distributed assembled matrix input (size>1) */ 1521 mumps->id.nz_loc = mumps->nz; 1522 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1523 if (mumps->id.ICNTL(6)>1) { 1524 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1525 } 1526 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1527 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1528 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1529 ierr = VecDestroy(&b);CHKERRQ(ierr); 1530 break; 1531 } 1532 PetscMUMPS_c(mumps); 1533 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1534 1535 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1536 F->ops->solve = MatSolve_MUMPS; 1537 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1538 F->ops->matsolve = MatMatSolve_MUMPS; 1539 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1540 PetscFunctionReturn(0); 1541 } 1542 1543 /* Note the Petsc r and c permutations are ignored */ 1544 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1545 { 1546 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1547 PetscErrorCode ierr; 1548 Vec b; 1549 const PetscInt M = A->rmap->N; 1550 1551 PetscFunctionBegin; 1552 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1553 1554 /* Set MUMPS options from the options database */ 1555 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1556 1557 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1558 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1559 1560 /* analysis phase */ 1561 /*----------------*/ 1562 mumps->id.job = JOB_FACTSYMBOLIC; 1563 mumps->id.n = M; 1564 switch (mumps->id.ICNTL(18)) { 1565 case 0: /* centralized assembled matrix input */ 1566 if (!mumps->myid) { 1567 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1568 if (mumps->id.ICNTL(6)>1) { 1569 mumps->id.a = (MumpsScalar*)mumps->val; 1570 } 1571 } 1572 break; 1573 case 3: /* distributed assembled matrix input (size>1) */ 1574 mumps->id.nz_loc = mumps->nz; 1575 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1576 if (mumps->id.ICNTL(6)>1) { 1577 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1578 } 1579 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1580 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1581 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1582 ierr = VecDestroy(&b);CHKERRQ(ierr); 1583 break; 1584 } 1585 PetscMUMPS_c(mumps); 1586 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1587 1588 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1589 F->ops->solve = MatSolve_MUMPS; 1590 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1591 PetscFunctionReturn(0); 1592 } 1593 1594 /* Note the Petsc r permutation and factor info are ignored */ 1595 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1596 { 1597 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1598 PetscErrorCode ierr; 1599 Vec b; 1600 const PetscInt M = A->rmap->N; 1601 1602 PetscFunctionBegin; 1603 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1604 1605 /* Set MUMPS options from the options database */ 1606 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1607 1608 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1609 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1610 1611 /* analysis phase */ 1612 /*----------------*/ 1613 mumps->id.job = JOB_FACTSYMBOLIC; 1614 mumps->id.n = M; 1615 switch (mumps->id.ICNTL(18)) { 1616 case 0: /* centralized assembled matrix input */ 1617 if (!mumps->myid) { 1618 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1619 if (mumps->id.ICNTL(6)>1) { 1620 mumps->id.a = (MumpsScalar*)mumps->val; 1621 } 1622 } 1623 break; 1624 case 3: /* distributed assembled matrix input (size>1) */ 1625 mumps->id.nz_loc = mumps->nz; 1626 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1627 if (mumps->id.ICNTL(6)>1) { 1628 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1629 } 1630 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1631 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1632 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1633 ierr = VecDestroy(&b);CHKERRQ(ierr); 1634 break; 1635 } 1636 PetscMUMPS_c(mumps); 1637 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1638 1639 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1640 F->ops->solve = MatSolve_MUMPS; 1641 F->ops->solvetranspose = MatSolve_MUMPS; 1642 F->ops->matsolve = MatMatSolve_MUMPS; 1643 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1644 #if defined(PETSC_USE_COMPLEX) 1645 F->ops->getinertia = NULL; 1646 #else 1647 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1648 #endif 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1653 { 1654 PetscErrorCode ierr; 1655 PetscBool iascii; 1656 PetscViewerFormat format; 1657 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1658 1659 PetscFunctionBegin; 1660 /* check if matrix is mumps type */ 1661 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1662 1663 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1664 if (iascii) { 1665 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1666 if (format == PETSC_VIEWER_ASCII_INFO) { 1667 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1668 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1669 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1670 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1671 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1672 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1673 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1674 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1675 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1676 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1678 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1679 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1680 if (mumps->id.ICNTL(11)>0) { 1681 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1682 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1683 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1684 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1685 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1686 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1687 } 1688 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1689 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1690 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1691 /* ICNTL(15-17) not used */ 1692 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1693 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1694 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1695 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1696 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1697 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1698 1699 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1700 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1701 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1702 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1703 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1704 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1705 1706 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1707 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1708 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1709 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1711 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1712 1713 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1714 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1715 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1716 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1717 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1718 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1719 1720 /* infomation local to each processor */ 1721 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1722 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1723 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1724 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1725 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1726 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1727 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1728 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1729 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1730 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1731 1732 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1733 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1734 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1735 1736 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1737 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1738 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1739 1740 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1741 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1742 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1743 1744 if (mumps->ninfo && mumps->ninfo <= 80){ 1745 PetscInt i; 1746 for (i=0; i<mumps->ninfo; i++){ 1747 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1748 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1749 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1750 } 1751 } 1752 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1753 1754 if (!mumps->myid) { /* information from the host */ 1755 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1756 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1758 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1759 1760 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1762 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1763 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1764 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1765 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1766 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1767 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1769 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1770 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1794 } 1795 } 1796 } 1797 PetscFunctionReturn(0); 1798 } 1799 1800 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1801 { 1802 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1803 1804 PetscFunctionBegin; 1805 info->block_size = 1.0; 1806 info->nz_allocated = mumps->id.INFOG(20); 1807 info->nz_used = mumps->id.INFOG(20); 1808 info->nz_unneeded = 0.0; 1809 info->assemblies = 0.0; 1810 info->mallocs = 0.0; 1811 info->memory = 0.0; 1812 info->fill_ratio_given = 0; 1813 info->fill_ratio_needed = 0; 1814 info->factor_mallocs = 0; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 /* -------------------------------------------------------------------------------------------*/ 1819 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1820 { 1821 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1822 const PetscInt *idxs; 1823 PetscInt size,i; 1824 PetscErrorCode ierr; 1825 1826 PetscFunctionBegin; 1827 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1828 if (mumps->petsc_size > 1) { 1829 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1830 1831 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1832 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1833 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1834 } 1835 if (mumps->id.size_schur != size) { 1836 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1837 mumps->id.size_schur = size; 1838 mumps->id.schur_lld = size; 1839 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1840 } 1841 1842 /* Schur complement matrix */ 1843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1844 if (mumps->sym == 1) { 1845 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1846 } 1847 1848 /* MUMPS expects Fortran style indices */ 1849 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1850 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1851 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1852 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1853 if (mumps->petsc_size > 1) { 1854 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1855 } else { 1856 if (F->factortype == MAT_FACTOR_LU) { 1857 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1858 } else { 1859 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1860 } 1861 } 1862 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1863 mumps->id.ICNTL(26) = -1; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 /* -------------------------------------------------------------------------------------------*/ 1868 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1869 { 1870 Mat St; 1871 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1872 PetscScalar *array; 1873 #if defined(PETSC_USE_COMPLEX) 1874 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1875 #endif 1876 PetscErrorCode ierr; 1877 1878 PetscFunctionBegin; 1879 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1880 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1881 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1882 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1883 ierr = MatSetUp(St);CHKERRQ(ierr); 1884 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1885 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1886 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1887 PetscInt i,j,N=mumps->id.size_schur; 1888 for (i=0;i<N;i++) { 1889 for (j=0;j<N;j++) { 1890 #if !defined(PETSC_USE_COMPLEX) 1891 PetscScalar val = mumps->id.schur[i*N+j]; 1892 #else 1893 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1894 #endif 1895 array[j*N+i] = val; 1896 } 1897 } 1898 } else { /* stored by columns */ 1899 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1900 } 1901 } else { /* either full or lower-triangular (not packed) */ 1902 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1903 PetscInt i,j,N=mumps->id.size_schur; 1904 for (i=0;i<N;i++) { 1905 for (j=i;j<N;j++) { 1906 #if !defined(PETSC_USE_COMPLEX) 1907 PetscScalar val = mumps->id.schur[i*N+j]; 1908 #else 1909 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1910 #endif 1911 array[i*N+j] = val; 1912 array[j*N+i] = val; 1913 } 1914 } 1915 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1916 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1917 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1918 PetscInt i,j,N=mumps->id.size_schur; 1919 for (i=0;i<N;i++) { 1920 for (j=0;j<i+1;j++) { 1921 #if !defined(PETSC_USE_COMPLEX) 1922 PetscScalar val = mumps->id.schur[i*N+j]; 1923 #else 1924 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1925 #endif 1926 array[i*N+j] = val; 1927 array[j*N+i] = val; 1928 } 1929 } 1930 } 1931 } 1932 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1933 *S = St; 1934 PetscFunctionReturn(0); 1935 } 1936 1937 /* -------------------------------------------------------------------------------------------*/ 1938 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1939 { 1940 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1941 1942 PetscFunctionBegin; 1943 mumps->id.ICNTL(icntl) = ival; 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1948 { 1949 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1950 1951 PetscFunctionBegin; 1952 *ival = mumps->id.ICNTL(icntl); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 /*@ 1957 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 1958 1959 Logically Collective on Mat 1960 1961 Input Parameters: 1962 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1963 . icntl - index of MUMPS parameter array ICNTL() 1964 - ival - value of MUMPS ICNTL(icntl) 1965 1966 Options Database: 1967 . -mat_mumps_icntl_<icntl> <ival> 1968 1969 Level: beginner 1970 1971 References: 1972 . MUMPS Users' Guide 1973 1974 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 1975 @*/ 1976 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 1977 { 1978 PetscErrorCode ierr; 1979 1980 PetscFunctionBegin; 1981 PetscValidType(F,1); 1982 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 1983 PetscValidLogicalCollectiveInt(F,icntl,2); 1984 PetscValidLogicalCollectiveInt(F,ival,3); 1985 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 /*@ 1990 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 1991 1992 Logically Collective on Mat 1993 1994 Input Parameters: 1995 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1996 - icntl - index of MUMPS parameter array ICNTL() 1997 1998 Output Parameter: 1999 . ival - value of MUMPS ICNTL(icntl) 2000 2001 Level: beginner 2002 2003 References: 2004 . MUMPS Users' Guide 2005 2006 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2007 @*/ 2008 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2009 { 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 PetscValidType(F,1); 2014 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2015 PetscValidLogicalCollectiveInt(F,icntl,2); 2016 PetscValidIntPointer(ival,3); 2017 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 /* -------------------------------------------------------------------------------------------*/ 2022 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2023 { 2024 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2025 2026 PetscFunctionBegin; 2027 mumps->id.CNTL(icntl) = val; 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2032 { 2033 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2034 2035 PetscFunctionBegin; 2036 *val = mumps->id.CNTL(icntl); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 /*@ 2041 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2042 2043 Logically Collective on Mat 2044 2045 Input Parameters: 2046 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2047 . icntl - index of MUMPS parameter array CNTL() 2048 - val - value of MUMPS CNTL(icntl) 2049 2050 Options Database: 2051 . -mat_mumps_cntl_<icntl> <val> 2052 2053 Level: beginner 2054 2055 References: 2056 . MUMPS Users' Guide 2057 2058 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2059 @*/ 2060 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2061 { 2062 PetscErrorCode ierr; 2063 2064 PetscFunctionBegin; 2065 PetscValidType(F,1); 2066 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2067 PetscValidLogicalCollectiveInt(F,icntl,2); 2068 PetscValidLogicalCollectiveReal(F,val,3); 2069 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 /*@ 2074 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2075 2076 Logically Collective on Mat 2077 2078 Input Parameters: 2079 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2080 - icntl - index of MUMPS parameter array CNTL() 2081 2082 Output Parameter: 2083 . val - value of MUMPS CNTL(icntl) 2084 2085 Level: beginner 2086 2087 References: 2088 . MUMPS Users' Guide 2089 2090 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2091 @*/ 2092 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2093 { 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 PetscValidType(F,1); 2098 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2099 PetscValidLogicalCollectiveInt(F,icntl,2); 2100 PetscValidRealPointer(val,3); 2101 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2106 { 2107 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2108 2109 PetscFunctionBegin; 2110 *info = mumps->id.INFO(icntl); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2115 { 2116 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2117 2118 PetscFunctionBegin; 2119 *infog = mumps->id.INFOG(icntl); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2124 { 2125 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2126 2127 PetscFunctionBegin; 2128 *rinfo = mumps->id.RINFO(icntl); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2133 { 2134 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2135 2136 PetscFunctionBegin; 2137 *rinfog = mumps->id.RINFOG(icntl); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2142 { 2143 PetscErrorCode ierr; 2144 Mat Bt = NULL,Btseq = NULL; 2145 PetscBool flg; 2146 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2147 PetscScalar *aa; 2148 PetscInt spnr,*ia,*ja; 2149 2150 PetscFunctionBegin; 2151 PetscValidIntPointer(spRHS,2); 2152 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2153 if (flg) { 2154 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2155 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2156 2157 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2158 2159 if (mumps->petsc_size > 1) { 2160 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2161 Btseq = b->A; 2162 } else { 2163 Btseq = Bt; 2164 } 2165 2166 if (!mumps->myid) { 2167 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2168 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2169 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2170 2171 mumps->id.irhs_ptr = ia; 2172 mumps->id.irhs_sparse = ja; 2173 mumps->id.nz_rhs = ia[spnr] - 1; 2174 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2175 } else { 2176 mumps->id.irhs_ptr = NULL; 2177 mumps->id.irhs_sparse = NULL; 2178 mumps->id.nz_rhs = 0; 2179 mumps->id.rhs_sparse = NULL; 2180 } 2181 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2182 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2183 2184 /* solve phase */ 2185 /*-------------*/ 2186 mumps->id.job = JOB_SOLVE; 2187 PetscMUMPS_c(mumps); 2188 if (mumps->id.INFOG(1) < 0) 2189 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2190 2191 if (!mumps->myid) { 2192 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2193 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2194 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 /*@ 2200 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2201 2202 Logically Collective on Mat 2203 2204 Input Parameters: 2205 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2206 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2207 2208 Output Parameter: 2209 . spRHS - requested entries of inverse of A 2210 2211 Level: beginner 2212 2213 References: 2214 . MUMPS Users' Guide 2215 2216 .seealso: MatGetFactor(), MatCreateTranspose() 2217 @*/ 2218 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2219 { 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 PetscValidType(F,1); 2224 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2225 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2230 { 2231 PetscErrorCode ierr; 2232 Mat spRHS; 2233 2234 PetscFunctionBegin; 2235 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2236 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2237 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 /*@ 2242 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2243 2244 Logically Collective on Mat 2245 2246 Input Parameters: 2247 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2248 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2249 2250 Output Parameter: 2251 . spRHST - requested entries of inverse of A^T 2252 2253 Level: beginner 2254 2255 References: 2256 . MUMPS Users' Guide 2257 2258 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2259 @*/ 2260 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2261 { 2262 PetscErrorCode ierr; 2263 PetscBool flg; 2264 2265 PetscFunctionBegin; 2266 PetscValidType(F,1); 2267 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2268 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2269 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2270 2271 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 /*@ 2276 MatMumpsGetInfo - Get MUMPS parameter INFO() 2277 2278 Logically Collective on Mat 2279 2280 Input Parameters: 2281 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2282 - icntl - index of MUMPS parameter array INFO() 2283 2284 Output Parameter: 2285 . ival - value of MUMPS INFO(icntl) 2286 2287 Level: beginner 2288 2289 References: 2290 . MUMPS Users' Guide 2291 2292 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2293 @*/ 2294 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2295 { 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 PetscValidType(F,1); 2300 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2301 PetscValidIntPointer(ival,3); 2302 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 /*@ 2307 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2308 2309 Logically Collective on Mat 2310 2311 Input Parameters: 2312 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2313 - icntl - index of MUMPS parameter array INFOG() 2314 2315 Output Parameter: 2316 . ival - value of MUMPS INFOG(icntl) 2317 2318 Level: beginner 2319 2320 References: 2321 . MUMPS Users' Guide 2322 2323 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2324 @*/ 2325 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2326 { 2327 PetscErrorCode ierr; 2328 2329 PetscFunctionBegin; 2330 PetscValidType(F,1); 2331 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2332 PetscValidIntPointer(ival,3); 2333 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 /*@ 2338 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2339 2340 Logically Collective on Mat 2341 2342 Input Parameters: 2343 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2344 - icntl - index of MUMPS parameter array RINFO() 2345 2346 Output Parameter: 2347 . val - value of MUMPS RINFO(icntl) 2348 2349 Level: beginner 2350 2351 References: 2352 . MUMPS Users' Guide 2353 2354 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2355 @*/ 2356 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2357 { 2358 PetscErrorCode ierr; 2359 2360 PetscFunctionBegin; 2361 PetscValidType(F,1); 2362 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2363 PetscValidRealPointer(val,3); 2364 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 /*@ 2369 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2370 2371 Logically Collective on Mat 2372 2373 Input Parameters: 2374 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2375 - icntl - index of MUMPS parameter array RINFOG() 2376 2377 Output Parameter: 2378 . val - value of MUMPS RINFOG(icntl) 2379 2380 Level: beginner 2381 2382 References: 2383 . MUMPS Users' Guide 2384 2385 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2386 @*/ 2387 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2388 { 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 PetscValidType(F,1); 2393 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2394 PetscValidRealPointer(val,3); 2395 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 /*MC 2400 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2401 distributed and sequential matrices via the external package MUMPS. 2402 2403 Works with MATAIJ and MATSBAIJ matrices 2404 2405 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2406 2407 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2408 2409 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2410 2411 Options Database Keys: 2412 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2413 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2414 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2415 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2416 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2417 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2418 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2419 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2420 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2421 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2422 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2423 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2424 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2425 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2426 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2427 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2428 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2429 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2430 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2431 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2432 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2433 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2434 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2435 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2436 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2437 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2438 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2439 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2440 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2441 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2442 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2443 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2444 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2445 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2446 2447 Level: beginner 2448 2449 Notes: 2450 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2451 $ KSPGetPC(ksp,&pc); 2452 $ PCFactorGetMatrix(pc,&mat); 2453 $ MatMumpsGetInfo(mat,....); 2454 $ MatMumpsGetInfog(mat,....); etc. 2455 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2456 2457 If you want to run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still want to run the non-MUMPS part 2458 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2459 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2460 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or open sourced 2461 OpenBLAS (PETSc has configure options to install/specify them). With these conditions met, you can run your program as before but with 2462 an extra option -mat_mumps_use_omp_threads [m]. It works as if we set OMP_NUM_THREADS=m to MUMPS, with m defaults to the number of cores 2463 per CPU socket (or package, in hwloc term), or number of PETSc MPI processes on a node, whichever is smaller. 2464 2465 By flat-MPI or pure-MPI mode, it means you run your code with as many MPI ranks as the number of cores. For example, 2466 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test". To run MPI+OpenMP hybrid MUMPS, 2467 the tranditional way is to set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2468 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". The problem of this approach is that the non-MUMPS 2469 part of your code is run with fewer cores and CPUs are wasted. "-mat_mumps_use_omp_threads [m]" provides an alternative such that 2470 you can stil run your code with as many MPI ranks as the number of cores, but have MUMPS run in MPI+OpenMP hybrid mode. In our example, 2471 you can use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16". 2472 2473 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type to get MPI 2474 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2475 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2476 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2477 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2478 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2479 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2480 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2481 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2482 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2483 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbsoe -m block:block to map consecutive MPI ranks to sockets and 2484 examine the mapping result. 2485 2486 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2487 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2488 calls omp_set_num_threads(m) internally before calling MUMPS. 2489 2490 References: 2491 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2492 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2493 2494 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2495 2496 M*/ 2497 2498 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2499 { 2500 PetscFunctionBegin; 2501 *type = MATSOLVERMUMPS; 2502 PetscFunctionReturn(0); 2503 } 2504 2505 /* MatGetFactor for Seq and MPI AIJ matrices */ 2506 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2507 { 2508 Mat B; 2509 PetscErrorCode ierr; 2510 Mat_MUMPS *mumps; 2511 PetscBool isSeqAIJ; 2512 2513 PetscFunctionBegin; 2514 /* Create the factorization matrix */ 2515 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2516 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2517 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2518 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2519 ierr = MatSetUp(B);CHKERRQ(ierr); 2520 2521 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2522 2523 B->ops->view = MatView_MUMPS; 2524 B->ops->getinfo = MatGetInfo_MUMPS; 2525 2526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2528 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2539 2540 if (ftype == MAT_FACTOR_LU) { 2541 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2542 B->factortype = MAT_FACTOR_LU; 2543 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2544 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2545 mumps->sym = 0; 2546 } else { 2547 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2548 B->factortype = MAT_FACTOR_CHOLESKY; 2549 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2550 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2551 #if defined(PETSC_USE_COMPLEX) 2552 mumps->sym = 2; 2553 #else 2554 if (A->spd_set && A->spd) mumps->sym = 1; 2555 else mumps->sym = 2; 2556 #endif 2557 } 2558 2559 /* set solvertype */ 2560 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2561 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2562 2563 B->ops->destroy = MatDestroy_MUMPS; 2564 B->data = (void*)mumps; 2565 2566 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2567 2568 *F = B; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2573 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2574 { 2575 Mat B; 2576 PetscErrorCode ierr; 2577 Mat_MUMPS *mumps; 2578 PetscBool isSeqSBAIJ; 2579 2580 PetscFunctionBegin; 2581 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2582 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2583 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2584 /* Create the factorization matrix */ 2585 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2586 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2587 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2588 ierr = MatSetUp(B);CHKERRQ(ierr); 2589 2590 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2591 if (isSeqSBAIJ) { 2592 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2593 } else { 2594 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2595 } 2596 2597 B->ops->getinfo = MatGetInfo_External; 2598 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2599 B->ops->view = MatView_MUMPS; 2600 2601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2614 2615 B->factortype = MAT_FACTOR_CHOLESKY; 2616 #if defined(PETSC_USE_COMPLEX) 2617 mumps->sym = 2; 2618 #else 2619 if (A->spd_set && A->spd) mumps->sym = 1; 2620 else mumps->sym = 2; 2621 #endif 2622 2623 /* set solvertype */ 2624 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2625 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2626 2627 B->ops->destroy = MatDestroy_MUMPS; 2628 B->data = (void*)mumps; 2629 2630 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2631 2632 *F = B; 2633 PetscFunctionReturn(0); 2634 } 2635 2636 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2637 { 2638 Mat B; 2639 PetscErrorCode ierr; 2640 Mat_MUMPS *mumps; 2641 PetscBool isSeqBAIJ; 2642 2643 PetscFunctionBegin; 2644 /* Create the factorization matrix */ 2645 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2646 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2647 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2648 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2649 ierr = MatSetUp(B);CHKERRQ(ierr); 2650 2651 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2652 if (ftype == MAT_FACTOR_LU) { 2653 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2654 B->factortype = MAT_FACTOR_LU; 2655 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2656 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2657 mumps->sym = 0; 2658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2659 2660 B->ops->getinfo = MatGetInfo_External; 2661 B->ops->view = MatView_MUMPS; 2662 2663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2676 2677 /* set solvertype */ 2678 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2679 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2680 2681 B->ops->destroy = MatDestroy_MUMPS; 2682 B->data = (void*)mumps; 2683 2684 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2685 2686 *F = B; 2687 PetscFunctionReturn(0); 2688 } 2689 2690 /* MatGetFactor for Seq and MPI SELL matrices */ 2691 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2692 { 2693 Mat B; 2694 PetscErrorCode ierr; 2695 Mat_MUMPS *mumps; 2696 PetscBool isSeqSELL; 2697 2698 PetscFunctionBegin; 2699 /* Create the factorization matrix */ 2700 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2701 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2702 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2703 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2704 ierr = MatSetUp(B);CHKERRQ(ierr); 2705 2706 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2707 2708 B->ops->view = MatView_MUMPS; 2709 B->ops->getinfo = MatGetInfo_MUMPS; 2710 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2718 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2720 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2721 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2722 2723 if (ftype == MAT_FACTOR_LU) { 2724 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2725 B->factortype = MAT_FACTOR_LU; 2726 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2727 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2728 mumps->sym = 0; 2729 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2730 2731 /* set solvertype */ 2732 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2733 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2734 2735 B->ops->destroy = MatDestroy_MUMPS; 2736 B->data = (void*)mumps; 2737 2738 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2739 2740 *F = B; 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2745 { 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2750 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2751 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2752 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2753 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2754 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2755 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2756 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2757 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2758 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2759 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763