1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 344 PetscFunctionBegin; 345 *v = aa->a; 346 if (reuse == MAT_INITIAL_MATRIX) { 347 nz = aa->nz; 348 ai = aa->i; 349 aj = aa->j; 350 *v = aa->a; 351 *nnz = nz; 352 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 353 col = row + nz; 354 355 nz = 0; 356 for (i=0; i<M; i++) { 357 rnz = ai[i+1] - ai[i]; 358 ajj = aj + ai[i]; 359 for (j=0; j<rnz; j++) { 360 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 361 } 362 } 363 *r = row; *c = col; 364 } 365 PetscFunctionReturn(0); 366 } 367 368 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 369 { 370 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 371 PetscInt nz,rnz,i,j; 372 const PetscScalar *av,*v1; 373 PetscScalar *val; 374 PetscErrorCode ierr; 375 PetscInt *row,*col; 376 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 377 PetscBool missing; 378 379 PetscFunctionBegin; 380 ai = aa->i; aj = aa->j; av = aa->a; 381 adiag = aa->diag; 382 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 383 if (reuse == MAT_INITIAL_MATRIX) { 384 /* count nz in the upper triangular part of A */ 385 nz = 0; 386 if (missing) { 387 for (i=0; i<M; i++) { 388 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 389 for (j=ai[i];j<ai[i+1];j++) { 390 if (aj[j] < i) continue; 391 nz++; 392 } 393 } else { 394 nz += ai[i+1] - adiag[i]; 395 } 396 } 397 } else { 398 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 399 } 400 *nnz = nz; 401 402 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 403 col = row + nz; 404 val = (PetscScalar*)(col + nz); 405 406 nz = 0; 407 if (missing) { 408 for (i=0; i<M; i++) { 409 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 410 for (j=ai[i];j<ai[i+1];j++) { 411 if (aj[j] < i) continue; 412 row[nz] = i+shift; 413 col[nz] = aj[j]+shift; 414 val[nz] = av[j]; 415 nz++; 416 } 417 } else { 418 rnz = ai[i+1] - adiag[i]; 419 ajj = aj + adiag[i]; 420 v1 = av + adiag[i]; 421 for (j=0; j<rnz; j++) { 422 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 423 } 424 } 425 } 426 } else { 427 for (i=0; i<M; i++) { 428 rnz = ai[i+1] - adiag[i]; 429 ajj = aj + adiag[i]; 430 v1 = av + adiag[i]; 431 for (j=0; j<rnz; j++) { 432 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 433 } 434 } 435 } 436 *r = row; *c = col; *v = val; 437 } else { 438 nz = 0; val = *v; 439 if (missing) { 440 for (i=0; i <M; i++) { 441 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 442 for (j=ai[i];j<ai[i+1];j++) { 443 if (aj[j] < i) continue; 444 val[nz++] = av[j]; 445 } 446 } else { 447 rnz = ai[i+1] - adiag[i]; 448 v1 = av + adiag[i]; 449 for (j=0; j<rnz; j++) { 450 val[nz++] = v1[j]; 451 } 452 } 453 } 454 } else { 455 for (i=0; i <M; i++) { 456 rnz = ai[i+1] - adiag[i]; 457 v1 = av + adiag[i]; 458 for (j=0; j<rnz; j++) { 459 val[nz++] = v1[j]; 460 } 461 } 462 } 463 } 464 PetscFunctionReturn(0); 465 } 466 467 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 468 { 469 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 470 PetscErrorCode ierr; 471 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 472 PetscInt *row,*col; 473 const PetscScalar *av, *bv,*v1,*v2; 474 PetscScalar *val; 475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 476 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 477 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 478 479 PetscFunctionBegin; 480 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 481 av=aa->a; bv=bb->a; 482 483 garray = mat->garray; 484 485 if (reuse == MAT_INITIAL_MATRIX) { 486 nz = aa->nz + bb->nz; 487 *nnz = nz; 488 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 489 col = row + nz; 490 val = (PetscScalar*)(col + nz); 491 492 *r = row; *c = col; *v = val; 493 } else { 494 row = *r; col = *c; val = *v; 495 } 496 497 jj = 0; irow = rstart; 498 for (i=0; i<m; i++) { 499 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 500 countA = ai[i+1] - ai[i]; 501 countB = bi[i+1] - bi[i]; 502 bjj = bj + bi[i]; 503 v1 = av + ai[i]; 504 v2 = bv + bi[i]; 505 506 /* A-part */ 507 for (j=0; j<countA; j++) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 510 } 511 val[jj++] = v1[j]; 512 } 513 514 /* B-part */ 515 for (j=0; j < countB; j++) { 516 if (reuse == MAT_INITIAL_MATRIX) { 517 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 518 } 519 val[jj++] = v2[j]; 520 } 521 irow++; 522 } 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 527 { 528 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av, *bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 535 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 536 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 537 538 PetscFunctionBegin; 539 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 540 av=aa->a; bv=bb->a; 541 542 garray = mat->garray; 543 544 if (reuse == MAT_INITIAL_MATRIX) { 545 nz = aa->nz + bb->nz; 546 *nnz = nz; 547 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 548 col = row + nz; 549 val = (PetscScalar*)(col + nz); 550 551 *r = row; *c = col; *v = val; 552 } else { 553 row = *r; col = *c; val = *v; 554 } 555 556 jj = 0; irow = rstart; 557 for (i=0; i<m; i++) { 558 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 559 countA = ai[i+1] - ai[i]; 560 countB = bi[i+1] - bi[i]; 561 bjj = bj + bi[i]; 562 v1 = av + ai[i]; 563 v2 = bv + bi[i]; 564 565 /* A-part */ 566 for (j=0; j<countA; j++) { 567 if (reuse == MAT_INITIAL_MATRIX) { 568 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 569 } 570 val[jj++] = v1[j]; 571 } 572 573 /* B-part */ 574 for (j=0; j < countB; j++) { 575 if (reuse == MAT_INITIAL_MATRIX) { 576 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 577 } 578 val[jj++] = v2[j]; 579 } 580 irow++; 581 } 582 PetscFunctionReturn(0); 583 } 584 585 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 586 { 587 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 588 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 589 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 590 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 591 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 592 const PetscInt bs2=mat->bs2; 593 PetscErrorCode ierr; 594 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 595 PetscInt *row,*col; 596 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 597 PetscScalar *val; 598 599 PetscFunctionBegin; 600 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 601 if (reuse == MAT_INITIAL_MATRIX) { 602 nz = bs2*(aa->nz + bb->nz); 603 *nnz = nz; 604 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 605 col = row + nz; 606 val = (PetscScalar*)(col + nz); 607 608 *r = row; *c = col; *v = val; 609 } else { 610 row = *r; col = *c; val = *v; 611 } 612 613 jj = 0; irow = rstart; 614 for (i=0; i<mbs; i++) { 615 countA = ai[i+1] - ai[i]; 616 countB = bi[i+1] - bi[i]; 617 ajj = aj + ai[i]; 618 bjj = bj + bi[i]; 619 v1 = av + bs2*ai[i]; 620 v2 = bv + bs2*bi[i]; 621 622 idx = 0; 623 /* A-part */ 624 for (k=0; k<countA; k++) { 625 for (j=0; j<bs; j++) { 626 for (n=0; n<bs; n++) { 627 if (reuse == MAT_INITIAL_MATRIX) { 628 row[jj] = irow + n + shift; 629 col[jj] = rstart + bs*ajj[k] + j + shift; 630 } 631 val[jj++] = v1[idx++]; 632 } 633 } 634 } 635 636 idx = 0; 637 /* B-part */ 638 for (k=0; k<countB; k++) { 639 for (j=0; j<bs; j++) { 640 for (n=0; n<bs; n++) { 641 if (reuse == MAT_INITIAL_MATRIX) { 642 row[jj] = irow + n + shift; 643 col[jj] = bs*garray[bjj[k]] + j + shift; 644 } 645 val[jj++] = v2[idx++]; 646 } 647 } 648 } 649 irow += bs; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 655 { 656 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 657 PetscErrorCode ierr; 658 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 659 PetscInt *row,*col; 660 const PetscScalar *av, *bv,*v1,*v2; 661 PetscScalar *val; 662 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 663 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 664 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 665 666 PetscFunctionBegin; 667 ai=aa->i; aj=aa->j; adiag=aa->diag; 668 bi=bb->i; bj=bb->j; garray = mat->garray; 669 av=aa->a; bv=bb->a; 670 671 rstart = A->rmap->rstart; 672 673 if (reuse == MAT_INITIAL_MATRIX) { 674 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 675 nzb = 0; /* num of upper triangular entries in mat->B */ 676 for (i=0; i<m; i++) { 677 nza += (ai[i+1] - adiag[i]); 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 for (j=0; j<countB; j++) { 681 if (garray[bjj[j]] > rstart) nzb++; 682 } 683 } 684 685 nz = nza + nzb; /* total nz of upper triangular part of mat */ 686 *nnz = nz; 687 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 688 col = row + nz; 689 val = (PetscScalar*)(col + nz); 690 691 *r = row; *c = col; *v = val; 692 } else { 693 row = *r; col = *c; val = *v; 694 } 695 696 jj = 0; irow = rstart; 697 for (i=0; i<m; i++) { 698 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 699 v1 = av + adiag[i]; 700 countA = ai[i+1] - adiag[i]; 701 countB = bi[i+1] - bi[i]; 702 bjj = bj + bi[i]; 703 v2 = bv + bi[i]; 704 705 /* A-part */ 706 for (j=0; j<countA; j++) { 707 if (reuse == MAT_INITIAL_MATRIX) { 708 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 709 } 710 val[jj++] = v1[j]; 711 } 712 713 /* B-part */ 714 for (j=0; j < countB; j++) { 715 if (garray[bjj[j]] > rstart) { 716 if (reuse == MAT_INITIAL_MATRIX) { 717 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 718 } 719 val[jj++] = v2[j]; 720 } 721 } 722 irow++; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatDestroy_MUMPS(Mat A) 728 { 729 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 734 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 735 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 736 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 737 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 738 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 739 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 740 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 741 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 742 mumps->id.job = JOB_END; 743 PetscMUMPS_c(mumps); 744 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 745 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 746 #endif 747 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 748 ierr = PetscFree(A->data);CHKERRQ(ierr); 749 750 /* clear composed functions */ 751 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 768 { 769 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 770 PetscScalar *array; 771 Vec b_seq; 772 IS is_iden,is_petsc; 773 PetscErrorCode ierr; 774 PetscInt i; 775 PetscBool second_solve = PETSC_FALSE; 776 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 777 778 PetscFunctionBegin; 779 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 780 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 781 782 if (A->factorerrortype) { 783 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 784 ierr = VecSetInf(x);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 mumps->id.ICNTL(20)= 0; /* dense RHS */ 789 mumps->id.nrhs = 1; 790 b_seq = mumps->b_seq; 791 if (mumps->petsc_size > 1) { 792 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 793 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 796 } else { /* petsc_size == 1 */ 797 ierr = VecCopy(b,x);CHKERRQ(ierr); 798 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 799 } 800 if (!mumps->myid) { /* define rhs on the host */ 801 mumps->id.nrhs = 1; 802 mumps->id.rhs = (MumpsScalar*)array; 803 } 804 805 /* 806 handle condensation step of Schur complement (if any) 807 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 808 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 809 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 810 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 811 */ 812 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 813 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 814 second_solve = PETSC_TRUE; 815 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 816 } 817 /* solve phase */ 818 /*-------------*/ 819 mumps->id.job = JOB_SOLVE; 820 PetscMUMPS_c(mumps); 821 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 822 823 /* handle expansion step of Schur complement (if any) */ 824 if (second_solve) { 825 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 826 } 827 828 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 829 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 830 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 831 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 832 } 833 if (!mumps->scat_sol) { /* create scatter scat_sol */ 834 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 835 for (i=0; i<mumps->id.lsol_loc; i++) { 836 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 837 } 838 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 839 ierr = VecScatterCreateWithData(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 840 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 841 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 842 843 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 844 } 845 846 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 } 849 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 854 { 855 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 mumps->id.ICNTL(9) = 0; 860 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 861 mumps->id.ICNTL(9) = 1; 862 PetscFunctionReturn(0); 863 } 864 865 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 866 { 867 PetscErrorCode ierr; 868 Mat Bt = NULL; 869 PetscBool flg, flgT; 870 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 871 PetscInt i,nrhs,M; 872 PetscScalar *array,*bray; 873 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idx,*iidx,*idxx,*isol_loc_save; 874 MumpsScalar *sol_loc,*sol_loc_save; 875 IS is_to,is_from; 876 PetscInt k,proc,j,m; 877 const PetscInt *rstart; 878 Vec v_mpi,b_seq,x_seq; 879 VecScatter scat_rhs,scat_sol; 880 PetscScalar *aa; 881 PetscInt spnr,*ia,*ja; 882 Mat_MPIAIJ *b = NULL; 883 884 PetscFunctionBegin; 885 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 886 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 887 888 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 889 if (flg) { /* dense B */ 890 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 891 mumps->id.ICNTL(20)= 0; /* dense RHS */ 892 } else { /* sparse B */ 893 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 894 if (flgT) { /* input B is transpose of actural RHS matrix, 895 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 896 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 897 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 898 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 899 } 900 901 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 902 mumps->id.nrhs = nrhs; 903 mumps->id.lrhs = M; 904 mumps->id.rhs = NULL; 905 906 if (mumps->petsc_size == 1) { 907 PetscScalar *aa; 908 PetscInt spnr,*ia,*ja; 909 PetscBool second_solve = PETSC_FALSE; 910 911 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 912 mumps->id.rhs = (MumpsScalar*)array; 913 914 if (!Bt) { /* dense B */ 915 /* copy B to X */ 916 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 917 ierr = PetscMemcpy(array,bray,M*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 918 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 919 } else { /* sparse B */ 920 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 921 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 922 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 923 /* mumps requires ia and ja start at 1! */ 924 mumps->id.irhs_ptr = ia; 925 mumps->id.irhs_sparse = ja; 926 mumps->id.nz_rhs = ia[spnr] - 1; 927 mumps->id.rhs_sparse = (MumpsScalar*)aa; 928 } 929 /* handle condensation step of Schur complement (if any) */ 930 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 931 second_solve = PETSC_TRUE; 932 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 933 } 934 /* solve phase */ 935 /*-------------*/ 936 mumps->id.job = JOB_SOLVE; 937 PetscMUMPS_c(mumps); 938 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 939 940 /* handle expansion step of Schur complement (if any) */ 941 if (second_solve) { 942 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 943 } 944 if (Bt) { /* sparse B */ 945 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 946 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 947 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 948 } 949 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 954 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 955 956 /* create x_seq to hold mumps local solution */ 957 isol_loc_save = mumps->id.isol_loc; /* save it for MatSovle() */ 958 sol_loc_save = mumps->id.sol_loc; 959 960 lsol_loc = mumps->id.lsol_loc; 961 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 962 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 963 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 964 mumps->id.isol_loc = isol_loc; 965 966 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&x_seq);CHKERRQ(ierr); 967 968 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 969 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B; 970 iidx: inverse of idx, will be used by scattering mumps x_seq -> petsc X */ 971 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 972 973 if (!Bt) { /* dense B */ 974 /* wrap dense rhs matrix B into a vector v_mpi */ 975 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 976 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 977 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 978 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 979 980 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 981 if (!mumps->myid) { 982 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 983 k = 0; 984 for (proc=0; proc<mumps->petsc_size; proc++){ 985 for (j=0; j<nrhs; j++){ 986 for (i=rstart[proc]; i<rstart[proc+1]; i++){ 987 idx[k++] = j*M + i; 988 } 989 } 990 } 991 992 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 993 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 994 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 995 } else { 996 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 997 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 998 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 999 } 1000 ierr = VecScatterCreateWithData(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1001 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1003 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 1006 if (!mumps->myid) { /* define rhs on the host */ 1007 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1008 mumps->id.rhs = (MumpsScalar*)bray; 1009 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1010 } 1011 1012 } else { /* sparse B */ 1013 b = (Mat_MPIAIJ*)Bt->data; 1014 1015 /* wrap dense X into a vector v_mpi */ 1016 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1017 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1018 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1019 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1020 1021 if (!mumps->myid) { 1022 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1023 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1024 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1025 /* mumps requires ia and ja start at 1! */ 1026 mumps->id.irhs_ptr = ia; 1027 mumps->id.irhs_sparse = ja; 1028 mumps->id.nz_rhs = ia[spnr] - 1; 1029 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1030 } else { 1031 mumps->id.irhs_ptr = NULL; 1032 mumps->id.irhs_sparse = NULL; 1033 mumps->id.nz_rhs = 0; 1034 mumps->id.rhs_sparse = NULL; 1035 } 1036 } 1037 1038 /* solve phase */ 1039 /*-------------*/ 1040 mumps->id.job = JOB_SOLVE; 1041 PetscMUMPS_c(mumps); 1042 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1043 1044 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1045 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1046 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1047 1048 /* create scatter scat_sol */ 1049 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1050 /* iidx: inverse of idx computed above, used for scattering mumps x_seq to petsc X */ 1051 iidx = idx; 1052 k = 0; 1053 for (proc=0; proc<mumps->petsc_size; proc++){ 1054 for (j=0; j<nrhs; j++){ 1055 for (i=rstart[proc]; i<rstart[proc+1]; i++) iidx[j*M + i] = k++; 1056 } 1057 } 1058 1059 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1060 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1061 for (i=0; i<lsol_loc; i++) { 1062 isol_loc[i] -= 1; /* change Fortran style to C style */ 1063 idxx[i] = iidx[isol_loc[i]]; 1064 for (j=1; j<nrhs; j++){ 1065 idxx[j*lsol_loc+i] = iidx[isol_loc[i]+j*M]; 1066 } 1067 } 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1069 ierr = VecScatterCreateWithData(x_seq,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1070 ierr = VecScatterBegin(scat_sol,x_seq,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1072 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(scat_sol,x_seq,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1075 1076 /* free spaces */ 1077 mumps->id.sol_loc = sol_loc_save; 1078 mumps->id.isol_loc = isol_loc_save; 1079 1080 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1081 ierr = PetscFree(idx);CHKERRQ(ierr); 1082 ierr = PetscFree(idxx);CHKERRQ(ierr); 1083 ierr = VecDestroy(&x_seq);CHKERRQ(ierr); 1084 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1085 if (Bt) { 1086 if (!mumps->myid) { 1087 b = (Mat_MPIAIJ*)Bt->data; 1088 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1089 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1090 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1091 } 1092 } else { 1093 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1094 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1095 } 1096 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1097 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1102 { 1103 PetscErrorCode ierr; 1104 PetscBool flg; 1105 Mat B; 1106 1107 PetscFunctionBegin; 1108 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1109 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1110 1111 /* Create B=Bt^T that uses Bt's data structure */ 1112 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1113 1114 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1115 ierr = MatDestroy(&B);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #if !defined(PETSC_USE_COMPLEX) 1120 /* 1121 input: 1122 F: numeric factor 1123 output: 1124 nneg: total number of negative pivots 1125 nzero: total number of zero pivots 1126 npos: (global dimension of F) - nneg - nzero 1127 */ 1128 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1129 { 1130 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1131 PetscErrorCode ierr; 1132 PetscMPIInt size; 1133 1134 PetscFunctionBegin; 1135 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1136 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1137 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1138 1139 if (nneg) *nneg = mumps->id.INFOG(12); 1140 if (nzero || npos) { 1141 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1142 if (nzero) *nzero = mumps->id.INFOG(28); 1143 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1144 } 1145 PetscFunctionReturn(0); 1146 } 1147 #endif 1148 1149 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1150 { 1151 PetscErrorCode ierr; 1152 PetscInt i,nz=0,*irn,*jcn=0; 1153 PetscScalar *val=0; 1154 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1155 1156 PetscFunctionBegin; 1157 if (mumps->omp_comm_size > 1) { 1158 if (reuse == MAT_INITIAL_MATRIX) { 1159 /* master first gathers counts of nonzeros to receive */ 1160 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1161 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1162 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1163 1164 /* master allocates memory to receive nonzeros */ 1165 if (mumps->is_omp_master) { 1166 displs[0] = 0; 1167 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1168 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1169 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1170 jcn = irn + nz; 1171 val = (PetscScalar*)(jcn + nz); 1172 } 1173 1174 /* save the gatherv plan */ 1175 mumps->mpinz = mpinz; /* used as send count */ 1176 mumps->recvcount = recvcount; 1177 mumps->displs = displs; 1178 1179 /* master gathers nonzeros */ 1180 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1181 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1182 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1183 1184 /* master frees its row/col/val and replaces them with bigger arrays */ 1185 if (mumps->is_omp_master) { 1186 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1187 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1188 mumps->irn = irn; 1189 mumps->jcn = jcn; 1190 mumps->val = val; 1191 } 1192 } else { 1193 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1194 } 1195 } 1196 PetscFunctionReturn(0); 1197 } 1198 1199 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1200 { 1201 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1202 PetscErrorCode ierr; 1203 PetscBool isMPIAIJ; 1204 1205 PetscFunctionBegin; 1206 if (mumps->id.INFOG(1) < 0) { 1207 if (mumps->id.INFOG(1) == -6) { 1208 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1209 } 1210 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1215 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1216 1217 /* numerical factorization phase */ 1218 /*-------------------------------*/ 1219 mumps->id.job = JOB_FACTNUMERIC; 1220 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1221 if (!mumps->myid) { 1222 mumps->id.a = (MumpsScalar*)mumps->val; 1223 } 1224 } else { 1225 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1226 } 1227 PetscMUMPS_c(mumps); 1228 if (mumps->id.INFOG(1) < 0) { 1229 if (A->erroriffailure) { 1230 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1231 } else { 1232 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1233 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1234 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1235 } else if (mumps->id.INFOG(1) == -13) { 1236 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1237 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1238 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1239 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1240 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1241 } else { 1242 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1243 F->factorerrortype = MAT_FACTOR_OTHER; 1244 } 1245 } 1246 } 1247 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1248 1249 F->assembled = PETSC_TRUE; 1250 mumps->matstruc = SAME_NONZERO_PATTERN; 1251 if (F->schur) { /* reset Schur status to unfactored */ 1252 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1253 mumps->id.ICNTL(19) = 2; 1254 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1255 } 1256 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1257 } 1258 1259 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1260 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1261 1262 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1263 if (mumps->petsc_size > 1) { 1264 PetscInt lsol_loc; 1265 PetscScalar *sol_loc; 1266 1267 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1268 1269 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1270 if (mumps->x_seq) { 1271 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1272 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1273 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1274 } 1275 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1276 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1277 mumps->id.lsol_loc = lsol_loc; 1278 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1279 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1280 } 1281 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 /* Sets MUMPS options from the options database */ 1286 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1287 { 1288 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1289 PetscErrorCode ierr; 1290 PetscInt icntl,info[40],i,ninfo=40; 1291 PetscBool flg; 1292 1293 PetscFunctionBegin; 1294 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1295 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1296 if (flg) mumps->id.ICNTL(1) = icntl; 1297 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1298 if (flg) mumps->id.ICNTL(2) = icntl; 1299 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1300 if (flg) mumps->id.ICNTL(3) = icntl; 1301 1302 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1303 if (flg) mumps->id.ICNTL(4) = icntl; 1304 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1305 1306 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1307 if (flg) mumps->id.ICNTL(6) = icntl; 1308 1309 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1310 if (flg) { 1311 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1312 else mumps->id.ICNTL(7) = icntl; 1313 } 1314 1315 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1316 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1317 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1318 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1319 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1320 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1321 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1322 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1323 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1324 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1325 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1326 } 1327 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1328 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1329 1330 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1331 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1332 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1333 if (mumps->id.ICNTL(24)) { 1334 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1335 } 1336 1337 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1338 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1339 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1340 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1341 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1342 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1343 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1344 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1345 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1346 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Lock Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1347 1348 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1349 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1350 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1351 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1352 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1353 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1354 1355 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1356 1357 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1358 if (ninfo) { 1359 if (ninfo > 40) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 40\n",ninfo); 1360 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1361 mumps->ninfo = ninfo; 1362 for (i=0; i<ninfo; i++) { 1363 if (info[i] < 0 || info[i]>40) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 40\n",ninfo); 1364 else mumps->info[i] = info[i]; 1365 } 1366 } 1367 1368 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1373 { 1374 PetscErrorCode ierr; 1375 PetscInt nthreads=0; 1376 1377 PetscFunctionBegin; 1378 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1379 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1380 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1381 1382 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1383 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1384 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1385 if (mumps->use_petsc_omp_support) { 1386 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1387 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1388 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1389 #else 1390 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1391 #endif 1392 } else { 1393 mumps->omp_comm = PETSC_COMM_SELF; 1394 mumps->mumps_comm = mumps->petsc_comm; 1395 mumps->is_omp_master = PETSC_TRUE; 1396 } 1397 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1398 1399 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1400 mumps->id.job = JOB_INIT; 1401 mumps->id.par = 1; /* host participates factorizaton and solve */ 1402 mumps->id.sym = mumps->sym; 1403 1404 PetscMUMPS_c(mumps); 1405 1406 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1407 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1408 */ 1409 ierr = MPI_Bcast(mumps->id.icntl,40,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1410 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1411 1412 mumps->scat_rhs = NULL; 1413 mumps->scat_sol = NULL; 1414 1415 /* set PETSc-MUMPS default options - override MUMPS default */ 1416 mumps->id.ICNTL(3) = 0; 1417 mumps->id.ICNTL(4) = 0; 1418 if (mumps->petsc_size == 1) { 1419 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1420 } else { 1421 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1422 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1423 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1424 } 1425 1426 /* schur */ 1427 mumps->id.size_schur = 0; 1428 mumps->id.listvar_schur = NULL; 1429 mumps->id.schur = NULL; 1430 mumps->sizeredrhs = 0; 1431 mumps->schur_sol = NULL; 1432 mumps->schur_sizesol = 0; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1437 { 1438 PetscErrorCode ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1442 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1443 if (mumps->id.INFOG(1) < 0) { 1444 if (A->erroriffailure) { 1445 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1446 } else { 1447 if (mumps->id.INFOG(1) == -6) { 1448 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1449 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1450 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1451 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1452 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1453 } else { 1454 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1455 F->factorerrortype = MAT_FACTOR_OTHER; 1456 } 1457 } 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1463 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1464 { 1465 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1466 PetscErrorCode ierr; 1467 Vec b; 1468 IS is_iden; 1469 const PetscInt M = A->rmap->N; 1470 1471 PetscFunctionBegin; 1472 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1473 1474 /* Set MUMPS options from the options database */ 1475 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1476 1477 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1478 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1479 1480 /* analysis phase */ 1481 /*----------------*/ 1482 mumps->id.job = JOB_FACTSYMBOLIC; 1483 mumps->id.n = M; 1484 switch (mumps->id.ICNTL(18)) { 1485 case 0: /* centralized assembled matrix input */ 1486 if (!mumps->myid) { 1487 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1488 if (mumps->id.ICNTL(6)>1) { 1489 mumps->id.a = (MumpsScalar*)mumps->val; 1490 } 1491 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1492 /* 1493 PetscBool flag; 1494 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1495 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1496 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1497 */ 1498 if (!mumps->myid) { 1499 const PetscInt *idx; 1500 PetscInt i,*perm_in; 1501 1502 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1503 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1504 1505 mumps->id.perm_in = perm_in; 1506 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1507 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1508 } 1509 } 1510 } 1511 break; 1512 case 3: /* distributed assembled matrix input (size>1) */ 1513 mumps->id.nz_loc = mumps->nz; 1514 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1515 if (mumps->id.ICNTL(6)>1) { 1516 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1517 } 1518 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1519 if (!mumps->myid) { 1520 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->N,&mumps->b_seq);CHKERRQ(ierr); 1521 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap->N,0,1,&is_iden);CHKERRQ(ierr); 1522 } else { 1523 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1524 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1525 } 1526 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1527 ierr = VecScatterCreateWithData(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1528 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1529 ierr = VecDestroy(&b);CHKERRQ(ierr); 1530 break; 1531 } 1532 PetscMUMPS_c(mumps); 1533 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1534 1535 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1536 F->ops->solve = MatSolve_MUMPS; 1537 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1538 F->ops->matsolve = MatMatSolve_MUMPS; 1539 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1540 PetscFunctionReturn(0); 1541 } 1542 1543 /* Note the Petsc r and c permutations are ignored */ 1544 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1545 { 1546 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1547 PetscErrorCode ierr; 1548 Vec b; 1549 IS is_iden; 1550 const PetscInt M = A->rmap->N; 1551 1552 PetscFunctionBegin; 1553 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1554 1555 /* Set MUMPS options from the options database */ 1556 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1557 1558 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1559 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1560 1561 /* analysis phase */ 1562 /*----------------*/ 1563 mumps->id.job = JOB_FACTSYMBOLIC; 1564 mumps->id.n = M; 1565 switch (mumps->id.ICNTL(18)) { 1566 case 0: /* centralized assembled matrix input */ 1567 if (!mumps->myid) { 1568 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1569 if (mumps->id.ICNTL(6)>1) { 1570 mumps->id.a = (MumpsScalar*)mumps->val; 1571 } 1572 } 1573 break; 1574 case 3: /* distributed assembled matrix input (size>1) */ 1575 mumps->id.nz_loc = mumps->nz; 1576 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1577 if (mumps->id.ICNTL(6)>1) { 1578 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1579 } 1580 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1581 if (!mumps->myid) { 1582 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1583 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1584 } else { 1585 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1586 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1587 } 1588 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1589 ierr = VecScatterCreateWithData(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1590 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1591 ierr = VecDestroy(&b);CHKERRQ(ierr); 1592 break; 1593 } 1594 PetscMUMPS_c(mumps); 1595 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1596 1597 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1598 F->ops->solve = MatSolve_MUMPS; 1599 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1600 PetscFunctionReturn(0); 1601 } 1602 1603 /* Note the Petsc r permutation and factor info are ignored */ 1604 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1605 { 1606 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1607 PetscErrorCode ierr; 1608 Vec b; 1609 IS is_iden; 1610 const PetscInt M = A->rmap->N; 1611 1612 PetscFunctionBegin; 1613 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1614 1615 /* Set MUMPS options from the options database */ 1616 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1617 1618 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1619 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1620 1621 /* analysis phase */ 1622 /*----------------*/ 1623 mumps->id.job = JOB_FACTSYMBOLIC; 1624 mumps->id.n = M; 1625 switch (mumps->id.ICNTL(18)) { 1626 case 0: /* centralized assembled matrix input */ 1627 if (!mumps->myid) { 1628 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1629 if (mumps->id.ICNTL(6)>1) { 1630 mumps->id.a = (MumpsScalar*)mumps->val; 1631 } 1632 } 1633 break; 1634 case 3: /* distributed assembled matrix input (size>1) */ 1635 mumps->id.nz_loc = mumps->nz; 1636 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1637 if (mumps->id.ICNTL(6)>1) { 1638 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1639 } 1640 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1641 if (!mumps->myid) { 1642 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1643 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1644 } else { 1645 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1646 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1647 } 1648 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1649 ierr = VecScatterCreateWithData(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1650 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1651 ierr = VecDestroy(&b);CHKERRQ(ierr); 1652 break; 1653 } 1654 PetscMUMPS_c(mumps); 1655 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1656 1657 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1658 F->ops->solve = MatSolve_MUMPS; 1659 F->ops->solvetranspose = MatSolve_MUMPS; 1660 F->ops->matsolve = MatMatSolve_MUMPS; 1661 #if defined(PETSC_USE_COMPLEX) 1662 F->ops->getinertia = NULL; 1663 #else 1664 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1665 #endif 1666 PetscFunctionReturn(0); 1667 } 1668 1669 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1670 { 1671 PetscErrorCode ierr; 1672 PetscBool iascii; 1673 PetscViewerFormat format; 1674 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1675 1676 PetscFunctionBegin; 1677 /* check if matrix is mumps type */ 1678 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1679 1680 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1681 if (iascii) { 1682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1683 if (format == PETSC_VIEWER_ASCII_INFO) { 1684 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1685 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1686 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1687 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1689 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1690 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1691 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1692 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1693 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1694 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1695 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1696 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1697 if (mumps->id.ICNTL(11)>0) { 1698 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1699 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1700 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1701 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1702 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1703 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1704 } 1705 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1706 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1707 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1708 /* ICNTL(15-17) not used */ 1709 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1711 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1712 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1713 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1714 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1715 1716 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1717 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1718 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1719 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1720 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1721 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1722 1723 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1724 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1725 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1726 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1727 1728 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1729 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1730 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1731 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1732 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1733 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1734 1735 /* infomation local to each processor */ 1736 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1737 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1738 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1739 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1740 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1741 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1742 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1743 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1744 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1745 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1746 1747 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1748 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1749 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1750 1751 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1752 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1753 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1754 1755 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1756 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1758 1759 if (mumps->ninfo && mumps->ninfo <= 40){ 1760 PetscInt i; 1761 for (i=0; i<mumps->ninfo; i++){ 1762 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1763 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1764 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1765 } 1766 } 1767 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1768 1769 if (!mumps->myid) { /* information from the host */ 1770 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1774 1775 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1795 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1796 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1797 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1798 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1804 } 1805 } 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1813 1814 PetscFunctionBegin; 1815 info->block_size = 1.0; 1816 info->nz_allocated = mumps->id.INFOG(20); 1817 info->nz_used = mumps->id.INFOG(20); 1818 info->nz_unneeded = 0.0; 1819 info->assemblies = 0.0; 1820 info->mallocs = 0.0; 1821 info->memory = 0.0; 1822 info->fill_ratio_given = 0; 1823 info->fill_ratio_needed = 0; 1824 info->factor_mallocs = 0; 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /* -------------------------------------------------------------------------------------------*/ 1829 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1830 { 1831 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1832 const PetscInt *idxs; 1833 PetscInt size,i; 1834 PetscErrorCode ierr; 1835 1836 PetscFunctionBegin; 1837 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1838 if (mumps->petsc_size > 1) { 1839 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1840 1841 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1842 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1843 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1844 } 1845 if (mumps->id.size_schur != size) { 1846 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1847 mumps->id.size_schur = size; 1848 mumps->id.schur_lld = size; 1849 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1850 } 1851 1852 /* Schur complement matrix */ 1853 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1854 if (mumps->sym == 1) { 1855 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1856 } 1857 1858 /* MUMPS expects Fortran style indices */ 1859 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1860 ierr = PetscMemcpy(mumps->id.listvar_schur,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 1861 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1862 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1863 if (mumps->petsc_size > 1) { 1864 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1865 } else { 1866 if (F->factortype == MAT_FACTOR_LU) { 1867 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1868 } else { 1869 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1870 } 1871 } 1872 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1873 mumps->id.ICNTL(26) = -1; 1874 PetscFunctionReturn(0); 1875 } 1876 1877 /* -------------------------------------------------------------------------------------------*/ 1878 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1879 { 1880 Mat St; 1881 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1882 PetscScalar *array; 1883 #if defined(PETSC_USE_COMPLEX) 1884 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1885 #endif 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1890 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1891 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1892 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1893 ierr = MatSetUp(St);CHKERRQ(ierr); 1894 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1895 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1896 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1897 PetscInt i,j,N=mumps->id.size_schur; 1898 for (i=0;i<N;i++) { 1899 for (j=0;j<N;j++) { 1900 #if !defined(PETSC_USE_COMPLEX) 1901 PetscScalar val = mumps->id.schur[i*N+j]; 1902 #else 1903 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1904 #endif 1905 array[j*N+i] = val; 1906 } 1907 } 1908 } else { /* stored by columns */ 1909 ierr = PetscMemcpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur*sizeof(PetscScalar));CHKERRQ(ierr); 1910 } 1911 } else { /* either full or lower-triangular (not packed) */ 1912 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1913 PetscInt i,j,N=mumps->id.size_schur; 1914 for (i=0;i<N;i++) { 1915 for (j=i;j<N;j++) { 1916 #if !defined(PETSC_USE_COMPLEX) 1917 PetscScalar val = mumps->id.schur[i*N+j]; 1918 #else 1919 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1920 #endif 1921 array[i*N+j] = val; 1922 array[j*N+i] = val; 1923 } 1924 } 1925 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1926 ierr = PetscMemcpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur*sizeof(PetscScalar));CHKERRQ(ierr); 1927 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1928 PetscInt i,j,N=mumps->id.size_schur; 1929 for (i=0;i<N;i++) { 1930 for (j=0;j<i+1;j++) { 1931 #if !defined(PETSC_USE_COMPLEX) 1932 PetscScalar val = mumps->id.schur[i*N+j]; 1933 #else 1934 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1935 #endif 1936 array[i*N+j] = val; 1937 array[j*N+i] = val; 1938 } 1939 } 1940 } 1941 } 1942 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1943 *S = St; 1944 PetscFunctionReturn(0); 1945 } 1946 1947 /* -------------------------------------------------------------------------------------------*/ 1948 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1949 { 1950 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1951 1952 PetscFunctionBegin; 1953 mumps->id.ICNTL(icntl) = ival; 1954 PetscFunctionReturn(0); 1955 } 1956 1957 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1958 { 1959 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1960 1961 PetscFunctionBegin; 1962 *ival = mumps->id.ICNTL(icntl); 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /*@ 1967 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 1968 1969 Logically Collective on Mat 1970 1971 Input Parameters: 1972 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1973 . icntl - index of MUMPS parameter array ICNTL() 1974 - ival - value of MUMPS ICNTL(icntl) 1975 1976 Options Database: 1977 . -mat_mumps_icntl_<icntl> <ival> 1978 1979 Level: beginner 1980 1981 References: 1982 . MUMPS Users' Guide 1983 1984 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 1985 @*/ 1986 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 1987 { 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 PetscValidType(F,1); 1992 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 1993 PetscValidLogicalCollectiveInt(F,icntl,2); 1994 PetscValidLogicalCollectiveInt(F,ival,3); 1995 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /*@ 2000 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2001 2002 Logically Collective on Mat 2003 2004 Input Parameters: 2005 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2006 - icntl - index of MUMPS parameter array ICNTL() 2007 2008 Output Parameter: 2009 . ival - value of MUMPS ICNTL(icntl) 2010 2011 Level: beginner 2012 2013 References: 2014 . MUMPS Users' Guide 2015 2016 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2017 @*/ 2018 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2019 { 2020 PetscErrorCode ierr; 2021 2022 PetscFunctionBegin; 2023 PetscValidType(F,1); 2024 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2025 PetscValidLogicalCollectiveInt(F,icntl,2); 2026 PetscValidIntPointer(ival,3); 2027 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 /* -------------------------------------------------------------------------------------------*/ 2032 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2033 { 2034 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2035 2036 PetscFunctionBegin; 2037 mumps->id.CNTL(icntl) = val; 2038 PetscFunctionReturn(0); 2039 } 2040 2041 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2042 { 2043 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2044 2045 PetscFunctionBegin; 2046 *val = mumps->id.CNTL(icntl); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 /*@ 2051 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2052 2053 Logically Collective on Mat 2054 2055 Input Parameters: 2056 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2057 . icntl - index of MUMPS parameter array CNTL() 2058 - val - value of MUMPS CNTL(icntl) 2059 2060 Options Database: 2061 . -mat_mumps_cntl_<icntl> <val> 2062 2063 Level: beginner 2064 2065 References: 2066 . MUMPS Users' Guide 2067 2068 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2069 @*/ 2070 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2071 { 2072 PetscErrorCode ierr; 2073 2074 PetscFunctionBegin; 2075 PetscValidType(F,1); 2076 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2077 PetscValidLogicalCollectiveInt(F,icntl,2); 2078 PetscValidLogicalCollectiveReal(F,val,3); 2079 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /*@ 2084 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2085 2086 Logically Collective on Mat 2087 2088 Input Parameters: 2089 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2090 - icntl - index of MUMPS parameter array CNTL() 2091 2092 Output Parameter: 2093 . val - value of MUMPS CNTL(icntl) 2094 2095 Level: beginner 2096 2097 References: 2098 . MUMPS Users' Guide 2099 2100 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2101 @*/ 2102 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2103 { 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 PetscValidType(F,1); 2108 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2109 PetscValidLogicalCollectiveInt(F,icntl,2); 2110 PetscValidRealPointer(val,3); 2111 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2116 { 2117 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2118 2119 PetscFunctionBegin; 2120 *info = mumps->id.INFO(icntl); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2125 { 2126 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2127 2128 PetscFunctionBegin; 2129 *infog = mumps->id.INFOG(icntl); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2134 { 2135 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2136 2137 PetscFunctionBegin; 2138 *rinfo = mumps->id.RINFO(icntl); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2143 { 2144 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2145 2146 PetscFunctionBegin; 2147 *rinfog = mumps->id.RINFOG(icntl); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2152 { 2153 PetscErrorCode ierr; 2154 Mat Bt = NULL,Btseq = NULL; 2155 PetscBool flg; 2156 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2157 PetscScalar *aa; 2158 PetscInt spnr,*ia,*ja; 2159 2160 PetscFunctionBegin; 2161 PetscValidIntPointer(spRHS,2); 2162 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2163 if (flg) { 2164 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2165 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2166 2167 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2168 2169 if (mumps->petsc_size > 1) { 2170 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2171 Btseq = b->A; 2172 } else { 2173 Btseq = Bt; 2174 } 2175 2176 if (!mumps->myid) { 2177 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2178 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2179 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2180 2181 mumps->id.irhs_ptr = ia; 2182 mumps->id.irhs_sparse = ja; 2183 mumps->id.nz_rhs = ia[spnr] - 1; 2184 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2185 } else { 2186 mumps->id.irhs_ptr = NULL; 2187 mumps->id.irhs_sparse = NULL; 2188 mumps->id.nz_rhs = 0; 2189 mumps->id.rhs_sparse = NULL; 2190 } 2191 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2192 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2193 2194 /* solve phase */ 2195 /*-------------*/ 2196 mumps->id.job = JOB_SOLVE; 2197 PetscMUMPS_c(mumps); 2198 if (mumps->id.INFOG(1) < 0) 2199 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2200 2201 if (!mumps->myid) { 2202 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2203 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2204 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /*@ 2210 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2211 2212 Logically Collective on Mat 2213 2214 Input Parameters: 2215 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2216 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2217 2218 Output Parameter: 2219 . spRHS - requested entries of inverse of A 2220 2221 Level: beginner 2222 2223 References: 2224 . MUMPS Users' Guide 2225 2226 .seealso: MatGetFactor(), MatCreateTranspose() 2227 @*/ 2228 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2229 { 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 PetscValidType(F,1); 2234 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2235 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2240 { 2241 PetscErrorCode ierr; 2242 Mat spRHS; 2243 2244 PetscFunctionBegin; 2245 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2246 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2247 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 /*@ 2252 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2253 2254 Logically Collective on Mat 2255 2256 Input Parameters: 2257 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2258 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2259 2260 Output Parameter: 2261 . spRHST - requested entries of inverse of A^T 2262 2263 Level: beginner 2264 2265 References: 2266 . MUMPS Users' Guide 2267 2268 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2269 @*/ 2270 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2271 { 2272 PetscErrorCode ierr; 2273 PetscBool flg; 2274 2275 PetscFunctionBegin; 2276 PetscValidType(F,1); 2277 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2278 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2279 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2280 2281 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 /*@ 2286 MatMumpsGetInfo - Get MUMPS parameter INFO() 2287 2288 Logically Collective on Mat 2289 2290 Input Parameters: 2291 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2292 - icntl - index of MUMPS parameter array INFO() 2293 2294 Output Parameter: 2295 . ival - value of MUMPS INFO(icntl) 2296 2297 Level: beginner 2298 2299 References: 2300 . MUMPS Users' Guide 2301 2302 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2303 @*/ 2304 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2305 { 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 PetscValidType(F,1); 2310 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2311 PetscValidIntPointer(ival,3); 2312 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 /*@ 2317 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2318 2319 Logically Collective on Mat 2320 2321 Input Parameters: 2322 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2323 - icntl - index of MUMPS parameter array INFOG() 2324 2325 Output Parameter: 2326 . ival - value of MUMPS INFOG(icntl) 2327 2328 Level: beginner 2329 2330 References: 2331 . MUMPS Users' Guide 2332 2333 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2334 @*/ 2335 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2336 { 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 PetscValidType(F,1); 2341 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2342 PetscValidIntPointer(ival,3); 2343 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /*@ 2348 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2349 2350 Logically Collective on Mat 2351 2352 Input Parameters: 2353 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2354 - icntl - index of MUMPS parameter array RINFO() 2355 2356 Output Parameter: 2357 . val - value of MUMPS RINFO(icntl) 2358 2359 Level: beginner 2360 2361 References: 2362 . MUMPS Users' Guide 2363 2364 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2365 @*/ 2366 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2367 { 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 PetscValidType(F,1); 2372 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2373 PetscValidRealPointer(val,3); 2374 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 /*@ 2379 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2380 2381 Logically Collective on Mat 2382 2383 Input Parameters: 2384 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2385 - icntl - index of MUMPS parameter array RINFOG() 2386 2387 Output Parameter: 2388 . val - value of MUMPS RINFOG(icntl) 2389 2390 Level: beginner 2391 2392 References: 2393 . MUMPS Users' Guide 2394 2395 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2396 @*/ 2397 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2398 { 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 PetscValidType(F,1); 2403 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2404 PetscValidRealPointer(val,3); 2405 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 /*MC 2410 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2411 distributed and sequential matrices via the external package MUMPS. 2412 2413 Works with MATAIJ and MATSBAIJ matrices 2414 2415 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2416 2417 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2418 2419 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2420 2421 Options Database Keys: 2422 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2423 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2424 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2425 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2426 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2427 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2428 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2429 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2430 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2431 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2432 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2433 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2434 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2435 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2436 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2437 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2438 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2439 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2440 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2441 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2442 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2443 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2444 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2445 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2446 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2447 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2448 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2449 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2450 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2451 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2452 2453 Level: beginner 2454 2455 Notes: 2456 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2457 $ KSPGetPC(ksp,&pc); 2458 $ PCFactorGetMatrix(pc,&mat); 2459 $ MatMumpsGetInfo(mat,....); 2460 $ MatMumpsGetInfog(mat,....); etc. 2461 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2462 2463 If you want to run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still want to run the non-MUMPS part 2464 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2465 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2466 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or open sourced 2467 OpenBLAS (PETSc has configure options to install/specify them). With these conditions met, you can run your program as before but with 2468 an extra option -mat_mumps_use_omp_threads [m]. It works as if we set OMP_NUM_THREADS=m to MUMPS, with m defaults to the number of cores 2469 per CPU socket (or package, in hwloc term), or number of PETSc MPI processes on a node, whichever is smaller. 2470 2471 By flat-MPI or pure-MPI mode, it means you run your code with as many MPI ranks as the number of cores. For example, 2472 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test". To run MPI+OpenMP hybrid MUMPS, 2473 the tranditional way is to set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2474 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". The problem of this approach is that the non-MUMPS 2475 part of your code is run with fewer cores and CPUs are wasted. "-mat_mumps_use_omp_threads [m]" provides an alternative such that 2476 you can stil run your code with as many MPI ranks as the number of cores, but have MUMPS run in MPI+OpenMP hybrid mode. In our example, 2477 you can use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16". 2478 2479 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type to get MPI 2480 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2481 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2482 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2483 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2484 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2485 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2486 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2487 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2488 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2489 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbsoe -m block:block to map consecutive MPI ranks to sockets and 2490 examine the mapping result. 2491 2492 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2493 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2494 calls omp_set_num_threads(m) internally before calling MUMPS. 2495 2496 References: 2497 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2498 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2499 2500 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2501 2502 M*/ 2503 2504 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2505 { 2506 PetscFunctionBegin; 2507 *type = MATSOLVERMUMPS; 2508 PetscFunctionReturn(0); 2509 } 2510 2511 /* MatGetFactor for Seq and MPI AIJ matrices */ 2512 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2513 { 2514 Mat B; 2515 PetscErrorCode ierr; 2516 Mat_MUMPS *mumps; 2517 PetscBool isSeqAIJ; 2518 2519 PetscFunctionBegin; 2520 /* Create the factorization matrix */ 2521 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2522 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2523 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2524 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2525 ierr = MatSetUp(B);CHKERRQ(ierr); 2526 2527 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2528 2529 B->ops->view = MatView_MUMPS; 2530 B->ops->getinfo = MatGetInfo_MUMPS; 2531 2532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2540 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2545 2546 if (ftype == MAT_FACTOR_LU) { 2547 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2548 B->factortype = MAT_FACTOR_LU; 2549 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2550 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2551 mumps->sym = 0; 2552 } else { 2553 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2554 B->factortype = MAT_FACTOR_CHOLESKY; 2555 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2556 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2557 #if defined(PETSC_USE_COMPLEX) 2558 mumps->sym = 2; 2559 #else 2560 if (A->spd_set && A->spd) mumps->sym = 1; 2561 else mumps->sym = 2; 2562 #endif 2563 } 2564 2565 /* set solvertype */ 2566 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2567 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2568 2569 B->ops->destroy = MatDestroy_MUMPS; 2570 B->data = (void*)mumps; 2571 2572 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2573 2574 *F = B; 2575 PetscFunctionReturn(0); 2576 } 2577 2578 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2579 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2580 { 2581 Mat B; 2582 PetscErrorCode ierr; 2583 Mat_MUMPS *mumps; 2584 PetscBool isSeqSBAIJ; 2585 2586 PetscFunctionBegin; 2587 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2588 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2589 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2590 /* Create the factorization matrix */ 2591 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2592 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2593 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2594 ierr = MatSetUp(B);CHKERRQ(ierr); 2595 2596 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2597 if (isSeqSBAIJ) { 2598 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2599 } else { 2600 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2601 } 2602 2603 B->ops->getinfo = MatGetInfo_External; 2604 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2605 B->ops->view = MatView_MUMPS; 2606 2607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2620 2621 B->factortype = MAT_FACTOR_CHOLESKY; 2622 #if defined(PETSC_USE_COMPLEX) 2623 mumps->sym = 2; 2624 #else 2625 if (A->spd_set && A->spd) mumps->sym = 1; 2626 else mumps->sym = 2; 2627 #endif 2628 2629 /* set solvertype */ 2630 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2631 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2632 2633 B->ops->destroy = MatDestroy_MUMPS; 2634 B->data = (void*)mumps; 2635 2636 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2637 2638 *F = B; 2639 PetscFunctionReturn(0); 2640 } 2641 2642 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2643 { 2644 Mat B; 2645 PetscErrorCode ierr; 2646 Mat_MUMPS *mumps; 2647 PetscBool isSeqBAIJ; 2648 2649 PetscFunctionBegin; 2650 /* Create the factorization matrix */ 2651 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2652 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2653 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2654 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2655 ierr = MatSetUp(B);CHKERRQ(ierr); 2656 2657 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2658 if (ftype == MAT_FACTOR_LU) { 2659 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2660 B->factortype = MAT_FACTOR_LU; 2661 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2662 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2663 mumps->sym = 0; 2664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2665 2666 B->ops->getinfo = MatGetInfo_External; 2667 B->ops->view = MatView_MUMPS; 2668 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2682 2683 /* set solvertype */ 2684 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2685 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2686 2687 B->ops->destroy = MatDestroy_MUMPS; 2688 B->data = (void*)mumps; 2689 2690 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2691 2692 *F = B; 2693 PetscFunctionReturn(0); 2694 } 2695 2696 /* MatGetFactor for Seq and MPI SELL matrices */ 2697 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2698 { 2699 Mat B; 2700 PetscErrorCode ierr; 2701 Mat_MUMPS *mumps; 2702 PetscBool isSeqSELL; 2703 2704 PetscFunctionBegin; 2705 /* Create the factorization matrix */ 2706 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2707 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2708 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2709 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2710 ierr = MatSetUp(B);CHKERRQ(ierr); 2711 2712 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2713 2714 B->ops->view = MatView_MUMPS; 2715 B->ops->getinfo = MatGetInfo_MUMPS; 2716 2717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2718 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2720 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2721 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2722 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2723 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2724 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2725 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2726 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2728 2729 if (ftype == MAT_FACTOR_LU) { 2730 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2731 B->factortype = MAT_FACTOR_LU; 2732 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2733 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2734 mumps->sym = 0; 2735 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2736 2737 /* set solvertype */ 2738 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2739 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2740 2741 B->ops->destroy = MatDestroy_MUMPS; 2742 B->data = (void*)mumps; 2743 2744 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2745 2746 *F = B; 2747 PetscFunctionReturn(0); 2748 } 2749 2750 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2751 { 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2756 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2757 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2758 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2759 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2760 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2761 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2762 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2763 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2764 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2765 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769