1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 344 PetscFunctionBegin; 345 *v = aa->a; 346 if (reuse == MAT_INITIAL_MATRIX) { 347 nz = aa->nz; 348 ai = aa->i; 349 aj = aa->j; 350 *v = aa->a; 351 *nnz = nz; 352 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 353 col = row + nz; 354 355 nz = 0; 356 for (i=0; i<M; i++) { 357 rnz = ai[i+1] - ai[i]; 358 ajj = aj + ai[i]; 359 for (j=0; j<rnz; j++) { 360 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 361 } 362 } 363 *r = row; *c = col; 364 } 365 PetscFunctionReturn(0); 366 } 367 368 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 369 { 370 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 371 PetscInt nz,rnz,i,j; 372 const PetscScalar *av,*v1; 373 PetscScalar *val; 374 PetscErrorCode ierr; 375 PetscInt *row,*col; 376 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 377 PetscBool missing; 378 379 PetscFunctionBegin; 380 ai = aa->i; aj = aa->j; av = aa->a; 381 adiag = aa->diag; 382 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 383 if (reuse == MAT_INITIAL_MATRIX) { 384 /* count nz in the upper triangular part of A */ 385 nz = 0; 386 if (missing) { 387 for (i=0; i<M; i++) { 388 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 389 for (j=ai[i];j<ai[i+1];j++) { 390 if (aj[j] < i) continue; 391 nz++; 392 } 393 } else { 394 nz += ai[i+1] - adiag[i]; 395 } 396 } 397 } else { 398 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 399 } 400 *nnz = nz; 401 402 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 403 col = row + nz; 404 val = (PetscScalar*)(col + nz); 405 406 nz = 0; 407 if (missing) { 408 for (i=0; i<M; i++) { 409 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 410 for (j=ai[i];j<ai[i+1];j++) { 411 if (aj[j] < i) continue; 412 row[nz] = i+shift; 413 col[nz] = aj[j]+shift; 414 val[nz] = av[j]; 415 nz++; 416 } 417 } else { 418 rnz = ai[i+1] - adiag[i]; 419 ajj = aj + adiag[i]; 420 v1 = av + adiag[i]; 421 for (j=0; j<rnz; j++) { 422 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 423 } 424 } 425 } 426 } else { 427 for (i=0; i<M; i++) { 428 rnz = ai[i+1] - adiag[i]; 429 ajj = aj + adiag[i]; 430 v1 = av + adiag[i]; 431 for (j=0; j<rnz; j++) { 432 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 433 } 434 } 435 } 436 *r = row; *c = col; *v = val; 437 } else { 438 nz = 0; val = *v; 439 if (missing) { 440 for (i=0; i <M; i++) { 441 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 442 for (j=ai[i];j<ai[i+1];j++) { 443 if (aj[j] < i) continue; 444 val[nz++] = av[j]; 445 } 446 } else { 447 rnz = ai[i+1] - adiag[i]; 448 v1 = av + adiag[i]; 449 for (j=0; j<rnz; j++) { 450 val[nz++] = v1[j]; 451 } 452 } 453 } 454 } else { 455 for (i=0; i <M; i++) { 456 rnz = ai[i+1] - adiag[i]; 457 v1 = av + adiag[i]; 458 for (j=0; j<rnz; j++) { 459 val[nz++] = v1[j]; 460 } 461 } 462 } 463 } 464 PetscFunctionReturn(0); 465 } 466 467 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 468 { 469 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 470 PetscErrorCode ierr; 471 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 472 PetscInt *row,*col; 473 const PetscScalar *av, *bv,*v1,*v2; 474 PetscScalar *val; 475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 476 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 477 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 478 479 PetscFunctionBegin; 480 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 481 av=aa->a; bv=bb->a; 482 483 garray = mat->garray; 484 485 if (reuse == MAT_INITIAL_MATRIX) { 486 nz = aa->nz + bb->nz; 487 *nnz = nz; 488 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 489 col = row + nz; 490 val = (PetscScalar*)(col + nz); 491 492 *r = row; *c = col; *v = val; 493 } else { 494 row = *r; col = *c; val = *v; 495 } 496 497 jj = 0; irow = rstart; 498 for (i=0; i<m; i++) { 499 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 500 countA = ai[i+1] - ai[i]; 501 countB = bi[i+1] - bi[i]; 502 bjj = bj + bi[i]; 503 v1 = av + ai[i]; 504 v2 = bv + bi[i]; 505 506 /* A-part */ 507 for (j=0; j<countA; j++) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 510 } 511 val[jj++] = v1[j]; 512 } 513 514 /* B-part */ 515 for (j=0; j < countB; j++) { 516 if (reuse == MAT_INITIAL_MATRIX) { 517 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 518 } 519 val[jj++] = v2[j]; 520 } 521 irow++; 522 } 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 527 { 528 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av, *bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 535 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 536 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 537 538 PetscFunctionBegin; 539 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 540 av=aa->a; bv=bb->a; 541 542 garray = mat->garray; 543 544 if (reuse == MAT_INITIAL_MATRIX) { 545 nz = aa->nz + bb->nz; 546 *nnz = nz; 547 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 548 col = row + nz; 549 val = (PetscScalar*)(col + nz); 550 551 *r = row; *c = col; *v = val; 552 } else { 553 row = *r; col = *c; val = *v; 554 } 555 556 jj = 0; irow = rstart; 557 for (i=0; i<m; i++) { 558 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 559 countA = ai[i+1] - ai[i]; 560 countB = bi[i+1] - bi[i]; 561 bjj = bj + bi[i]; 562 v1 = av + ai[i]; 563 v2 = bv + bi[i]; 564 565 /* A-part */ 566 for (j=0; j<countA; j++) { 567 if (reuse == MAT_INITIAL_MATRIX) { 568 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 569 } 570 val[jj++] = v1[j]; 571 } 572 573 /* B-part */ 574 for (j=0; j < countB; j++) { 575 if (reuse == MAT_INITIAL_MATRIX) { 576 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 577 } 578 val[jj++] = v2[j]; 579 } 580 irow++; 581 } 582 PetscFunctionReturn(0); 583 } 584 585 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 586 { 587 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 588 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 589 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 590 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 591 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 592 const PetscInt bs2=mat->bs2; 593 PetscErrorCode ierr; 594 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 595 PetscInt *row,*col; 596 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 597 PetscScalar *val; 598 599 PetscFunctionBegin; 600 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 601 if (reuse == MAT_INITIAL_MATRIX) { 602 nz = bs2*(aa->nz + bb->nz); 603 *nnz = nz; 604 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 605 col = row + nz; 606 val = (PetscScalar*)(col + nz); 607 608 *r = row; *c = col; *v = val; 609 } else { 610 row = *r; col = *c; val = *v; 611 } 612 613 jj = 0; irow = rstart; 614 for (i=0; i<mbs; i++) { 615 countA = ai[i+1] - ai[i]; 616 countB = bi[i+1] - bi[i]; 617 ajj = aj + ai[i]; 618 bjj = bj + bi[i]; 619 v1 = av + bs2*ai[i]; 620 v2 = bv + bs2*bi[i]; 621 622 idx = 0; 623 /* A-part */ 624 for (k=0; k<countA; k++) { 625 for (j=0; j<bs; j++) { 626 for (n=0; n<bs; n++) { 627 if (reuse == MAT_INITIAL_MATRIX) { 628 row[jj] = irow + n + shift; 629 col[jj] = rstart + bs*ajj[k] + j + shift; 630 } 631 val[jj++] = v1[idx++]; 632 } 633 } 634 } 635 636 idx = 0; 637 /* B-part */ 638 for (k=0; k<countB; k++) { 639 for (j=0; j<bs; j++) { 640 for (n=0; n<bs; n++) { 641 if (reuse == MAT_INITIAL_MATRIX) { 642 row[jj] = irow + n + shift; 643 col[jj] = bs*garray[bjj[k]] + j + shift; 644 } 645 val[jj++] = v2[idx++]; 646 } 647 } 648 } 649 irow += bs; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 655 { 656 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 657 PetscErrorCode ierr; 658 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 659 PetscInt *row,*col; 660 const PetscScalar *av, *bv,*v1,*v2; 661 PetscScalar *val; 662 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 663 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 664 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 665 666 PetscFunctionBegin; 667 ai=aa->i; aj=aa->j; adiag=aa->diag; 668 bi=bb->i; bj=bb->j; garray = mat->garray; 669 av=aa->a; bv=bb->a; 670 671 rstart = A->rmap->rstart; 672 673 if (reuse == MAT_INITIAL_MATRIX) { 674 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 675 nzb = 0; /* num of upper triangular entries in mat->B */ 676 for (i=0; i<m; i++) { 677 nza += (ai[i+1] - adiag[i]); 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 for (j=0; j<countB; j++) { 681 if (garray[bjj[j]] > rstart) nzb++; 682 } 683 } 684 685 nz = nza + nzb; /* total nz of upper triangular part of mat */ 686 *nnz = nz; 687 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 688 col = row + nz; 689 val = (PetscScalar*)(col + nz); 690 691 *r = row; *c = col; *v = val; 692 } else { 693 row = *r; col = *c; val = *v; 694 } 695 696 jj = 0; irow = rstart; 697 for (i=0; i<m; i++) { 698 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 699 v1 = av + adiag[i]; 700 countA = ai[i+1] - adiag[i]; 701 countB = bi[i+1] - bi[i]; 702 bjj = bj + bi[i]; 703 v2 = bv + bi[i]; 704 705 /* A-part */ 706 for (j=0; j<countA; j++) { 707 if (reuse == MAT_INITIAL_MATRIX) { 708 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 709 } 710 val[jj++] = v1[j]; 711 } 712 713 /* B-part */ 714 for (j=0; j < countB; j++) { 715 if (garray[bjj[j]] > rstart) { 716 if (reuse == MAT_INITIAL_MATRIX) { 717 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 718 } 719 val[jj++] = v2[j]; 720 } 721 } 722 irow++; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatDestroy_MUMPS(Mat A) 728 { 729 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 734 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 735 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 736 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 737 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 738 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 739 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 740 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 741 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 742 mumps->id.job = JOB_END; 743 PetscMUMPS_c(mumps); 744 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 745 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 746 #endif 747 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 748 ierr = PetscFree(A->data);CHKERRQ(ierr); 749 750 /* clear composed functions */ 751 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 768 { 769 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 770 PetscScalar *array; 771 Vec b_seq; 772 IS is_iden,is_petsc; 773 PetscErrorCode ierr; 774 PetscInt i; 775 PetscBool second_solve = PETSC_FALSE; 776 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 777 778 PetscFunctionBegin; 779 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 780 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 781 782 if (A->factorerrortype) { 783 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 784 ierr = VecSetInf(x);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 mumps->id.ICNTL(20)= 0; /* dense RHS */ 789 mumps->id.nrhs = 1; 790 b_seq = mumps->b_seq; 791 if (mumps->petsc_size > 1) { 792 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 793 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 796 } else { /* petsc_size == 1 */ 797 ierr = VecCopy(b,x);CHKERRQ(ierr); 798 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 799 } 800 if (!mumps->myid) { /* define rhs on the host */ 801 mumps->id.nrhs = 1; 802 mumps->id.rhs = (MumpsScalar*)array; 803 } 804 805 /* 806 handle condensation step of Schur complement (if any) 807 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 808 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 809 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 810 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 811 */ 812 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 813 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 814 second_solve = PETSC_TRUE; 815 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 816 } 817 /* solve phase */ 818 /*-------------*/ 819 mumps->id.job = JOB_SOLVE; 820 PetscMUMPS_c(mumps); 821 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 822 823 /* handle expansion step of Schur complement (if any) */ 824 if (second_solve) { 825 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 826 } 827 828 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 829 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 830 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 831 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 832 } 833 if (!mumps->scat_sol) { /* create scatter scat_sol */ 834 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 835 for (i=0; i<mumps->id.lsol_loc; i++) { 836 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 837 } 838 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 839 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 840 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 841 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 842 843 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 844 } 845 846 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 } 849 850 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 851 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 852 853 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 858 { 859 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 mumps->id.ICNTL(9) = 0; 864 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 865 mumps->id.ICNTL(9) = 1; 866 PetscFunctionReturn(0); 867 } 868 869 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 870 { 871 PetscErrorCode ierr; 872 Mat Bt = NULL; 873 PetscBool flg, flgT; 874 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 875 PetscInt i,nrhs,M; 876 PetscScalar *array,*bray; 877 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 878 MumpsScalar *sol_loc,*sol_loc_save; 879 IS is_to,is_from; 880 PetscInt k,proc,j,m,myrstart; 881 const PetscInt *rstart; 882 Vec v_mpi,b_seq,msol_loc; 883 VecScatter scat_rhs,scat_sol; 884 PetscScalar *aa; 885 PetscInt spnr,*ia,*ja; 886 Mat_MPIAIJ *b = NULL; 887 888 PetscFunctionBegin; 889 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 890 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 891 892 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 893 if (flg) { /* dense B */ 894 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 895 mumps->id.ICNTL(20)= 0; /* dense RHS */ 896 } else { /* sparse B */ 897 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 898 if (flgT) { /* input B is transpose of actural RHS matrix, 899 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 900 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 901 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 902 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 903 } 904 905 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 906 mumps->id.nrhs = nrhs; 907 mumps->id.lrhs = M; 908 mumps->id.rhs = NULL; 909 910 if (mumps->petsc_size == 1) { 911 PetscScalar *aa; 912 PetscInt spnr,*ia,*ja; 913 PetscBool second_solve = PETSC_FALSE; 914 915 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 916 mumps->id.rhs = (MumpsScalar*)array; 917 918 if (!Bt) { /* dense B */ 919 /* copy B to X */ 920 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 921 ierr = PetscMemcpy(array,bray,M*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 922 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 923 } else { /* sparse B */ 924 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 925 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 926 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 927 /* mumps requires ia and ja start at 1! */ 928 mumps->id.irhs_ptr = ia; 929 mumps->id.irhs_sparse = ja; 930 mumps->id.nz_rhs = ia[spnr] - 1; 931 mumps->id.rhs_sparse = (MumpsScalar*)aa; 932 } 933 /* handle condensation step of Schur complement (if any) */ 934 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 935 second_solve = PETSC_TRUE; 936 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 937 } 938 /* solve phase */ 939 /*-------------*/ 940 mumps->id.job = JOB_SOLVE; 941 PetscMUMPS_c(mumps); 942 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 943 944 /* handle expansion step of Schur complement (if any) */ 945 if (second_solve) { 946 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 947 } 948 if (Bt) { /* sparse B */ 949 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 950 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 951 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 952 } 953 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 958 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 959 960 /* create msol_loc to hold mumps local solution */ 961 isol_loc_save = mumps->id.isol_loc; /* save it for MatSovle() */ 962 sol_loc_save = mumps->id.sol_loc; 963 964 lsol_loc = mumps->id.lsol_loc; 965 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 966 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 967 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 968 mumps->id.isol_loc = isol_loc; 969 970 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 971 972 if (!Bt) { /* dense B */ 973 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 974 /* wrap dense rhs matrix B into a vector v_mpi */ 975 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 976 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 977 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 978 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 979 980 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 981 if (!mumps->myid) { 982 PetscInt *idx; 983 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 984 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 985 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 986 k = 0; 987 for (proc=0; proc<mumps->petsc_size; proc++){ 988 for (j=0; j<nrhs; j++){ 989 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 990 } 991 } 992 993 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 994 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 995 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 996 } else { 997 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 998 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 999 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1000 } 1001 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1002 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1004 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 1007 if (!mumps->myid) { /* define rhs on the host */ 1008 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1009 mumps->id.rhs = (MumpsScalar*)bray; 1010 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1011 } 1012 1013 } else { /* sparse B */ 1014 b = (Mat_MPIAIJ*)Bt->data; 1015 1016 /* wrap dense X into a vector v_mpi */ 1017 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1018 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1019 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1020 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1021 1022 if (!mumps->myid) { 1023 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1024 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1025 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1026 /* mumps requires ia and ja start at 1! */ 1027 mumps->id.irhs_ptr = ia; 1028 mumps->id.irhs_sparse = ja; 1029 mumps->id.nz_rhs = ia[spnr] - 1; 1030 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1031 } else { 1032 mumps->id.irhs_ptr = NULL; 1033 mumps->id.irhs_sparse = NULL; 1034 mumps->id.nz_rhs = 0; 1035 mumps->id.rhs_sparse = NULL; 1036 } 1037 } 1038 1039 /* solve phase */ 1040 /*-------------*/ 1041 mumps->id.job = JOB_SOLVE; 1042 PetscMUMPS_c(mumps); 1043 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1044 1045 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1046 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1047 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1048 1049 /* create scatter scat_sol */ 1050 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1051 /* iidx: index for scatter mumps solution to petsc X */ 1052 1053 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1054 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1055 for (i=0; i<lsol_loc; i++) { 1056 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1057 1058 for (proc=0; proc<mumps->petsc_size; proc++){ 1059 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1060 myrstart = rstart[proc]; 1061 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1062 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1063 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1064 break; 1065 } 1066 } 1067 1068 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1069 } 1070 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1071 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1072 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1074 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1077 1078 /* free spaces */ 1079 mumps->id.sol_loc = sol_loc_save; 1080 mumps->id.isol_loc = isol_loc_save; 1081 1082 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1083 ierr = PetscFree(idxx);CHKERRQ(ierr); 1084 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1085 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1086 if (Bt) { 1087 if (!mumps->myid) { 1088 b = (Mat_MPIAIJ*)Bt->data; 1089 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1090 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1091 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1092 } 1093 } else { 1094 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1095 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1096 } 1097 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1098 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1103 { 1104 PetscErrorCode ierr; 1105 PetscBool flg; 1106 Mat B; 1107 1108 PetscFunctionBegin; 1109 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1110 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1111 1112 /* Create B=Bt^T that uses Bt's data structure */ 1113 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1114 1115 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1116 ierr = MatDestroy(&B);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #if !defined(PETSC_USE_COMPLEX) 1121 /* 1122 input: 1123 F: numeric factor 1124 output: 1125 nneg: total number of negative pivots 1126 nzero: total number of zero pivots 1127 npos: (global dimension of F) - nneg - nzero 1128 */ 1129 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1130 { 1131 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1132 PetscErrorCode ierr; 1133 PetscMPIInt size; 1134 1135 PetscFunctionBegin; 1136 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1137 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1138 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1139 1140 if (nneg) *nneg = mumps->id.INFOG(12); 1141 if (nzero || npos) { 1142 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1143 if (nzero) *nzero = mumps->id.INFOG(28); 1144 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 #endif 1149 1150 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1151 { 1152 PetscErrorCode ierr; 1153 PetscInt i,nz=0,*irn,*jcn=0; 1154 PetscScalar *val=0; 1155 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1156 1157 PetscFunctionBegin; 1158 if (mumps->omp_comm_size > 1) { 1159 if (reuse == MAT_INITIAL_MATRIX) { 1160 /* master first gathers counts of nonzeros to receive */ 1161 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1162 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1163 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1164 1165 /* master allocates memory to receive nonzeros */ 1166 if (mumps->is_omp_master) { 1167 displs[0] = 0; 1168 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1169 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1170 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1171 jcn = irn + nz; 1172 val = (PetscScalar*)(jcn + nz); 1173 } 1174 1175 /* save the gatherv plan */ 1176 mumps->mpinz = mpinz; /* used as send count */ 1177 mumps->recvcount = recvcount; 1178 mumps->displs = displs; 1179 1180 /* master gathers nonzeros */ 1181 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1182 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1183 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1184 1185 /* master frees its row/col/val and replaces them with bigger arrays */ 1186 if (mumps->is_omp_master) { 1187 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1188 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1189 mumps->irn = irn; 1190 mumps->jcn = jcn; 1191 mumps->val = val; 1192 } 1193 } else { 1194 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1195 } 1196 } 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1201 { 1202 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1203 PetscErrorCode ierr; 1204 PetscBool isMPIAIJ; 1205 1206 PetscFunctionBegin; 1207 if (mumps->id.INFOG(1) < 0) { 1208 if (mumps->id.INFOG(1) == -6) { 1209 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1210 } 1211 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1216 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1217 1218 /* numerical factorization phase */ 1219 /*-------------------------------*/ 1220 mumps->id.job = JOB_FACTNUMERIC; 1221 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1222 if (!mumps->myid) { 1223 mumps->id.a = (MumpsScalar*)mumps->val; 1224 } 1225 } else { 1226 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1227 } 1228 PetscMUMPS_c(mumps); 1229 if (mumps->id.INFOG(1) < 0) { 1230 if (A->erroriffailure) { 1231 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1232 } else { 1233 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1234 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1235 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1236 } else if (mumps->id.INFOG(1) == -13) { 1237 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1238 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1239 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1240 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1241 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1242 } else { 1243 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1244 F->factorerrortype = MAT_FACTOR_OTHER; 1245 } 1246 } 1247 } 1248 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1249 1250 F->assembled = PETSC_TRUE; 1251 mumps->matstruc = SAME_NONZERO_PATTERN; 1252 if (F->schur) { /* reset Schur status to unfactored */ 1253 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1254 mumps->id.ICNTL(19) = 2; 1255 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1256 } 1257 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1258 } 1259 1260 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1261 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1262 1263 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1264 if (mumps->petsc_size > 1) { 1265 PetscInt lsol_loc; 1266 PetscScalar *sol_loc; 1267 1268 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1269 1270 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1271 if (mumps->x_seq) { 1272 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1273 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1274 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1275 } 1276 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1277 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1278 mumps->id.lsol_loc = lsol_loc; 1279 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1280 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1281 } 1282 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* Sets MUMPS options from the options database */ 1287 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1288 { 1289 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1290 PetscErrorCode ierr; 1291 PetscInt icntl,info[40],i,ninfo=40; 1292 PetscBool flg; 1293 1294 PetscFunctionBegin; 1295 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1296 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1297 if (flg) mumps->id.ICNTL(1) = icntl; 1298 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1299 if (flg) mumps->id.ICNTL(2) = icntl; 1300 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1301 if (flg) mumps->id.ICNTL(3) = icntl; 1302 1303 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1304 if (flg) mumps->id.ICNTL(4) = icntl; 1305 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1306 1307 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1308 if (flg) mumps->id.ICNTL(6) = icntl; 1309 1310 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1311 if (flg) { 1312 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1313 else mumps->id.ICNTL(7) = icntl; 1314 } 1315 1316 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1317 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1318 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1319 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1320 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1321 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1322 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1323 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1324 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1325 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1326 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1327 } 1328 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1329 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1330 1331 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1332 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1333 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1334 if (mumps->id.ICNTL(24)) { 1335 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1336 } 1337 1338 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1339 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1340 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1341 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1342 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1343 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1344 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1345 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1346 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Lock Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1348 1349 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1350 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1351 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1352 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1353 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1354 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1355 1356 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1357 1358 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1359 if (ninfo) { 1360 if (ninfo > 40) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 40\n",ninfo); 1361 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1362 mumps->ninfo = ninfo; 1363 for (i=0; i<ninfo; i++) { 1364 if (info[i] < 0 || info[i]>40) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 40\n",ninfo); 1365 else mumps->info[i] = info[i]; 1366 } 1367 } 1368 1369 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1374 { 1375 PetscErrorCode ierr; 1376 PetscInt nthreads=0; 1377 1378 PetscFunctionBegin; 1379 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1380 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1381 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1382 1383 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1384 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1385 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1386 if (mumps->use_petsc_omp_support) { 1387 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1388 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1389 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1390 #else 1391 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1392 #endif 1393 } else { 1394 mumps->omp_comm = PETSC_COMM_SELF; 1395 mumps->mumps_comm = mumps->petsc_comm; 1396 mumps->is_omp_master = PETSC_TRUE; 1397 } 1398 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1399 1400 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1401 mumps->id.job = JOB_INIT; 1402 mumps->id.par = 1; /* host participates factorizaton and solve */ 1403 mumps->id.sym = mumps->sym; 1404 1405 PetscMUMPS_c(mumps); 1406 1407 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1408 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1409 */ 1410 ierr = MPI_Bcast(mumps->id.icntl,40,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1411 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1412 1413 mumps->scat_rhs = NULL; 1414 mumps->scat_sol = NULL; 1415 1416 /* set PETSc-MUMPS default options - override MUMPS default */ 1417 mumps->id.ICNTL(3) = 0; 1418 mumps->id.ICNTL(4) = 0; 1419 if (mumps->petsc_size == 1) { 1420 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1421 } else { 1422 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1423 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1424 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1425 } 1426 1427 /* schur */ 1428 mumps->id.size_schur = 0; 1429 mumps->id.listvar_schur = NULL; 1430 mumps->id.schur = NULL; 1431 mumps->sizeredrhs = 0; 1432 mumps->schur_sol = NULL; 1433 mumps->schur_sizesol = 0; 1434 PetscFunctionReturn(0); 1435 } 1436 1437 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1438 { 1439 PetscErrorCode ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1443 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1444 if (mumps->id.INFOG(1) < 0) { 1445 if (A->erroriffailure) { 1446 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1447 } else { 1448 if (mumps->id.INFOG(1) == -6) { 1449 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1450 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1451 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1452 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1453 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1454 } else { 1455 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1456 F->factorerrortype = MAT_FACTOR_OTHER; 1457 } 1458 } 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1464 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1465 { 1466 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1467 PetscErrorCode ierr; 1468 Vec b; 1469 IS is_iden; 1470 const PetscInt M = A->rmap->N; 1471 1472 PetscFunctionBegin; 1473 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1474 1475 /* Set MUMPS options from the options database */ 1476 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1477 1478 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1479 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1480 1481 /* analysis phase */ 1482 /*----------------*/ 1483 mumps->id.job = JOB_FACTSYMBOLIC; 1484 mumps->id.n = M; 1485 switch (mumps->id.ICNTL(18)) { 1486 case 0: /* centralized assembled matrix input */ 1487 if (!mumps->myid) { 1488 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1489 if (mumps->id.ICNTL(6)>1) { 1490 mumps->id.a = (MumpsScalar*)mumps->val; 1491 } 1492 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1493 /* 1494 PetscBool flag; 1495 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1496 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1497 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1498 */ 1499 if (!mumps->myid) { 1500 const PetscInt *idx; 1501 PetscInt i,*perm_in; 1502 1503 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1504 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1505 1506 mumps->id.perm_in = perm_in; 1507 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1508 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1509 } 1510 } 1511 } 1512 break; 1513 case 3: /* distributed assembled matrix input (size>1) */ 1514 mumps->id.nz_loc = mumps->nz; 1515 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1516 if (mumps->id.ICNTL(6)>1) { 1517 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1518 } 1519 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1520 if (!mumps->myid) { 1521 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->N,&mumps->b_seq);CHKERRQ(ierr); 1522 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap->N,0,1,&is_iden);CHKERRQ(ierr); 1523 } else { 1524 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1525 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1526 } 1527 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1528 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1529 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1530 ierr = VecDestroy(&b);CHKERRQ(ierr); 1531 break; 1532 } 1533 PetscMUMPS_c(mumps); 1534 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1535 1536 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1537 F->ops->solve = MatSolve_MUMPS; 1538 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1539 F->ops->matsolve = MatMatSolve_MUMPS; 1540 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1541 PetscFunctionReturn(0); 1542 } 1543 1544 /* Note the Petsc r and c permutations are ignored */ 1545 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1546 { 1547 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1548 PetscErrorCode ierr; 1549 Vec b; 1550 IS is_iden; 1551 const PetscInt M = A->rmap->N; 1552 1553 PetscFunctionBegin; 1554 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1555 1556 /* Set MUMPS options from the options database */ 1557 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1558 1559 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1560 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1561 1562 /* analysis phase */ 1563 /*----------------*/ 1564 mumps->id.job = JOB_FACTSYMBOLIC; 1565 mumps->id.n = M; 1566 switch (mumps->id.ICNTL(18)) { 1567 case 0: /* centralized assembled matrix input */ 1568 if (!mumps->myid) { 1569 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1570 if (mumps->id.ICNTL(6)>1) { 1571 mumps->id.a = (MumpsScalar*)mumps->val; 1572 } 1573 } 1574 break; 1575 case 3: /* distributed assembled matrix input (size>1) */ 1576 mumps->id.nz_loc = mumps->nz; 1577 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1578 if (mumps->id.ICNTL(6)>1) { 1579 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1580 } 1581 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1582 if (!mumps->myid) { 1583 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1584 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1585 } else { 1586 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1587 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1588 } 1589 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1590 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1591 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1592 ierr = VecDestroy(&b);CHKERRQ(ierr); 1593 break; 1594 } 1595 PetscMUMPS_c(mumps); 1596 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1597 1598 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1599 F->ops->solve = MatSolve_MUMPS; 1600 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* Note the Petsc r permutation and factor info are ignored */ 1605 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1606 { 1607 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1608 PetscErrorCode ierr; 1609 Vec b; 1610 IS is_iden; 1611 const PetscInt M = A->rmap->N; 1612 1613 PetscFunctionBegin; 1614 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1615 1616 /* Set MUMPS options from the options database */ 1617 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1618 1619 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1620 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1621 1622 /* analysis phase */ 1623 /*----------------*/ 1624 mumps->id.job = JOB_FACTSYMBOLIC; 1625 mumps->id.n = M; 1626 switch (mumps->id.ICNTL(18)) { 1627 case 0: /* centralized assembled matrix input */ 1628 if (!mumps->myid) { 1629 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1630 if (mumps->id.ICNTL(6)>1) { 1631 mumps->id.a = (MumpsScalar*)mumps->val; 1632 } 1633 } 1634 break; 1635 case 3: /* distributed assembled matrix input (size>1) */ 1636 mumps->id.nz_loc = mumps->nz; 1637 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1638 if (mumps->id.ICNTL(6)>1) { 1639 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1640 } 1641 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1642 if (!mumps->myid) { 1643 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1644 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1645 } else { 1646 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1647 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1648 } 1649 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1650 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1651 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1652 ierr = VecDestroy(&b);CHKERRQ(ierr); 1653 break; 1654 } 1655 PetscMUMPS_c(mumps); 1656 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1657 1658 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1659 F->ops->solve = MatSolve_MUMPS; 1660 F->ops->solvetranspose = MatSolve_MUMPS; 1661 F->ops->matsolve = MatMatSolve_MUMPS; 1662 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1663 #if defined(PETSC_USE_COMPLEX) 1664 F->ops->getinertia = NULL; 1665 #else 1666 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1667 #endif 1668 PetscFunctionReturn(0); 1669 } 1670 1671 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1672 { 1673 PetscErrorCode ierr; 1674 PetscBool iascii; 1675 PetscViewerFormat format; 1676 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1677 1678 PetscFunctionBegin; 1679 /* check if matrix is mumps type */ 1680 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1681 1682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1683 if (iascii) { 1684 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1685 if (format == PETSC_VIEWER_ASCII_INFO) { 1686 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1687 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1689 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1690 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1691 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1692 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1693 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1694 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1695 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1696 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1697 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1698 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1699 if (mumps->id.ICNTL(11)>0) { 1700 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1701 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1702 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1703 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1704 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1705 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1706 } 1707 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1708 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1709 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1710 /* ICNTL(15-17) not used */ 1711 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1712 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1713 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1714 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1715 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1716 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1717 1718 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1719 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1720 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1721 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1722 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1723 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1724 1725 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1726 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1727 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1728 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1729 1730 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1731 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1732 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1733 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1734 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1735 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1736 1737 /* infomation local to each processor */ 1738 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1739 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1740 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1741 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1742 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1743 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1744 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1745 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1746 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1747 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1748 1749 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1750 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1752 1753 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1754 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1755 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1756 1757 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1758 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1759 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1760 1761 if (mumps->ninfo && mumps->ninfo <= 40){ 1762 PetscInt i; 1763 for (i=0; i<mumps->ninfo; i++){ 1764 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1765 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1766 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1767 } 1768 } 1769 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1770 1771 if (!mumps->myid) { /* information from the host */ 1772 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1776 1777 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1795 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1796 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1797 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1798 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1806 } 1807 } 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 1812 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1813 { 1814 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1815 1816 PetscFunctionBegin; 1817 info->block_size = 1.0; 1818 info->nz_allocated = mumps->id.INFOG(20); 1819 info->nz_used = mumps->id.INFOG(20); 1820 info->nz_unneeded = 0.0; 1821 info->assemblies = 0.0; 1822 info->mallocs = 0.0; 1823 info->memory = 0.0; 1824 info->fill_ratio_given = 0; 1825 info->fill_ratio_needed = 0; 1826 info->factor_mallocs = 0; 1827 PetscFunctionReturn(0); 1828 } 1829 1830 /* -------------------------------------------------------------------------------------------*/ 1831 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1832 { 1833 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1834 const PetscInt *idxs; 1835 PetscInt size,i; 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1840 if (mumps->petsc_size > 1) { 1841 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1842 1843 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1844 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1845 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1846 } 1847 if (mumps->id.size_schur != size) { 1848 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1849 mumps->id.size_schur = size; 1850 mumps->id.schur_lld = size; 1851 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1852 } 1853 1854 /* Schur complement matrix */ 1855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1856 if (mumps->sym == 1) { 1857 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1858 } 1859 1860 /* MUMPS expects Fortran style indices */ 1861 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1862 ierr = PetscMemcpy(mumps->id.listvar_schur,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 1863 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1864 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1865 if (mumps->petsc_size > 1) { 1866 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1867 } else { 1868 if (F->factortype == MAT_FACTOR_LU) { 1869 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1870 } else { 1871 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1872 } 1873 } 1874 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1875 mumps->id.ICNTL(26) = -1; 1876 PetscFunctionReturn(0); 1877 } 1878 1879 /* -------------------------------------------------------------------------------------------*/ 1880 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1881 { 1882 Mat St; 1883 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1884 PetscScalar *array; 1885 #if defined(PETSC_USE_COMPLEX) 1886 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1887 #endif 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1892 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1893 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1894 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1895 ierr = MatSetUp(St);CHKERRQ(ierr); 1896 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1897 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1898 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1899 PetscInt i,j,N=mumps->id.size_schur; 1900 for (i=0;i<N;i++) { 1901 for (j=0;j<N;j++) { 1902 #if !defined(PETSC_USE_COMPLEX) 1903 PetscScalar val = mumps->id.schur[i*N+j]; 1904 #else 1905 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1906 #endif 1907 array[j*N+i] = val; 1908 } 1909 } 1910 } else { /* stored by columns */ 1911 ierr = PetscMemcpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur*sizeof(PetscScalar));CHKERRQ(ierr); 1912 } 1913 } else { /* either full or lower-triangular (not packed) */ 1914 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1915 PetscInt i,j,N=mumps->id.size_schur; 1916 for (i=0;i<N;i++) { 1917 for (j=i;j<N;j++) { 1918 #if !defined(PETSC_USE_COMPLEX) 1919 PetscScalar val = mumps->id.schur[i*N+j]; 1920 #else 1921 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1922 #endif 1923 array[i*N+j] = val; 1924 array[j*N+i] = val; 1925 } 1926 } 1927 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1928 ierr = PetscMemcpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur*sizeof(PetscScalar));CHKERRQ(ierr); 1929 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1930 PetscInt i,j,N=mumps->id.size_schur; 1931 for (i=0;i<N;i++) { 1932 for (j=0;j<i+1;j++) { 1933 #if !defined(PETSC_USE_COMPLEX) 1934 PetscScalar val = mumps->id.schur[i*N+j]; 1935 #else 1936 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1937 #endif 1938 array[i*N+j] = val; 1939 array[j*N+i] = val; 1940 } 1941 } 1942 } 1943 } 1944 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1945 *S = St; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /* -------------------------------------------------------------------------------------------*/ 1950 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1951 { 1952 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1953 1954 PetscFunctionBegin; 1955 mumps->id.ICNTL(icntl) = ival; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1960 { 1961 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1962 1963 PetscFunctionBegin; 1964 *ival = mumps->id.ICNTL(icntl); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 /*@ 1969 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 1970 1971 Logically Collective on Mat 1972 1973 Input Parameters: 1974 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1975 . icntl - index of MUMPS parameter array ICNTL() 1976 - ival - value of MUMPS ICNTL(icntl) 1977 1978 Options Database: 1979 . -mat_mumps_icntl_<icntl> <ival> 1980 1981 Level: beginner 1982 1983 References: 1984 . MUMPS Users' Guide 1985 1986 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 1987 @*/ 1988 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 1989 { 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 PetscValidType(F,1); 1994 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 1995 PetscValidLogicalCollectiveInt(F,icntl,2); 1996 PetscValidLogicalCollectiveInt(F,ival,3); 1997 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /*@ 2002 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2003 2004 Logically Collective on Mat 2005 2006 Input Parameters: 2007 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2008 - icntl - index of MUMPS parameter array ICNTL() 2009 2010 Output Parameter: 2011 . ival - value of MUMPS ICNTL(icntl) 2012 2013 Level: beginner 2014 2015 References: 2016 . MUMPS Users' Guide 2017 2018 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2019 @*/ 2020 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2021 { 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 PetscValidType(F,1); 2026 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2027 PetscValidLogicalCollectiveInt(F,icntl,2); 2028 PetscValidIntPointer(ival,3); 2029 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 /* -------------------------------------------------------------------------------------------*/ 2034 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2035 { 2036 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2037 2038 PetscFunctionBegin; 2039 mumps->id.CNTL(icntl) = val; 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2044 { 2045 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2046 2047 PetscFunctionBegin; 2048 *val = mumps->id.CNTL(icntl); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /*@ 2053 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2054 2055 Logically Collective on Mat 2056 2057 Input Parameters: 2058 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2059 . icntl - index of MUMPS parameter array CNTL() 2060 - val - value of MUMPS CNTL(icntl) 2061 2062 Options Database: 2063 . -mat_mumps_cntl_<icntl> <val> 2064 2065 Level: beginner 2066 2067 References: 2068 . MUMPS Users' Guide 2069 2070 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2071 @*/ 2072 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2073 { 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 PetscValidType(F,1); 2078 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2079 PetscValidLogicalCollectiveInt(F,icntl,2); 2080 PetscValidLogicalCollectiveReal(F,val,3); 2081 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /*@ 2086 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2087 2088 Logically Collective on Mat 2089 2090 Input Parameters: 2091 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2092 - icntl - index of MUMPS parameter array CNTL() 2093 2094 Output Parameter: 2095 . val - value of MUMPS CNTL(icntl) 2096 2097 Level: beginner 2098 2099 References: 2100 . MUMPS Users' Guide 2101 2102 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2103 @*/ 2104 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2105 { 2106 PetscErrorCode ierr; 2107 2108 PetscFunctionBegin; 2109 PetscValidType(F,1); 2110 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2111 PetscValidLogicalCollectiveInt(F,icntl,2); 2112 PetscValidRealPointer(val,3); 2113 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2118 { 2119 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2120 2121 PetscFunctionBegin; 2122 *info = mumps->id.INFO(icntl); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2127 { 2128 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2129 2130 PetscFunctionBegin; 2131 *infog = mumps->id.INFOG(icntl); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2136 { 2137 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2138 2139 PetscFunctionBegin; 2140 *rinfo = mumps->id.RINFO(icntl); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2145 { 2146 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2147 2148 PetscFunctionBegin; 2149 *rinfog = mumps->id.RINFOG(icntl); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2154 { 2155 PetscErrorCode ierr; 2156 Mat Bt = NULL,Btseq = NULL; 2157 PetscBool flg; 2158 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2159 PetscScalar *aa; 2160 PetscInt spnr,*ia,*ja; 2161 2162 PetscFunctionBegin; 2163 PetscValidIntPointer(spRHS,2); 2164 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2165 if (flg) { 2166 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2167 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2168 2169 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2170 2171 if (mumps->petsc_size > 1) { 2172 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2173 Btseq = b->A; 2174 } else { 2175 Btseq = Bt; 2176 } 2177 2178 if (!mumps->myid) { 2179 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2180 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2181 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2182 2183 mumps->id.irhs_ptr = ia; 2184 mumps->id.irhs_sparse = ja; 2185 mumps->id.nz_rhs = ia[spnr] - 1; 2186 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2187 } else { 2188 mumps->id.irhs_ptr = NULL; 2189 mumps->id.irhs_sparse = NULL; 2190 mumps->id.nz_rhs = 0; 2191 mumps->id.rhs_sparse = NULL; 2192 } 2193 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2194 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2195 2196 /* solve phase */ 2197 /*-------------*/ 2198 mumps->id.job = JOB_SOLVE; 2199 PetscMUMPS_c(mumps); 2200 if (mumps->id.INFOG(1) < 0) 2201 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2202 2203 if (!mumps->myid) { 2204 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2205 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2206 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2207 } 2208 PetscFunctionReturn(0); 2209 } 2210 2211 /*@ 2212 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2213 2214 Logically Collective on Mat 2215 2216 Input Parameters: 2217 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2218 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2219 2220 Output Parameter: 2221 . spRHS - requested entries of inverse of A 2222 2223 Level: beginner 2224 2225 References: 2226 . MUMPS Users' Guide 2227 2228 .seealso: MatGetFactor(), MatCreateTranspose() 2229 @*/ 2230 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2231 { 2232 PetscErrorCode ierr; 2233 2234 PetscFunctionBegin; 2235 PetscValidType(F,1); 2236 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2237 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2242 { 2243 PetscErrorCode ierr; 2244 Mat spRHS; 2245 2246 PetscFunctionBegin; 2247 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2248 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2249 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 /*@ 2254 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2255 2256 Logically Collective on Mat 2257 2258 Input Parameters: 2259 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2260 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2261 2262 Output Parameter: 2263 . spRHST - requested entries of inverse of A^T 2264 2265 Level: beginner 2266 2267 References: 2268 . MUMPS Users' Guide 2269 2270 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2271 @*/ 2272 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2273 { 2274 PetscErrorCode ierr; 2275 PetscBool flg; 2276 2277 PetscFunctionBegin; 2278 PetscValidType(F,1); 2279 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2280 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2281 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2282 2283 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 /*@ 2288 MatMumpsGetInfo - Get MUMPS parameter INFO() 2289 2290 Logically Collective on Mat 2291 2292 Input Parameters: 2293 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2294 - icntl - index of MUMPS parameter array INFO() 2295 2296 Output Parameter: 2297 . ival - value of MUMPS INFO(icntl) 2298 2299 Level: beginner 2300 2301 References: 2302 . MUMPS Users' Guide 2303 2304 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2305 @*/ 2306 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2307 { 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 PetscValidType(F,1); 2312 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2313 PetscValidIntPointer(ival,3); 2314 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 /*@ 2319 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2320 2321 Logically Collective on Mat 2322 2323 Input Parameters: 2324 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2325 - icntl - index of MUMPS parameter array INFOG() 2326 2327 Output Parameter: 2328 . ival - value of MUMPS INFOG(icntl) 2329 2330 Level: beginner 2331 2332 References: 2333 . MUMPS Users' Guide 2334 2335 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2336 @*/ 2337 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2338 { 2339 PetscErrorCode ierr; 2340 2341 PetscFunctionBegin; 2342 PetscValidType(F,1); 2343 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2344 PetscValidIntPointer(ival,3); 2345 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 /*@ 2350 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2351 2352 Logically Collective on Mat 2353 2354 Input Parameters: 2355 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2356 - icntl - index of MUMPS parameter array RINFO() 2357 2358 Output Parameter: 2359 . val - value of MUMPS RINFO(icntl) 2360 2361 Level: beginner 2362 2363 References: 2364 . MUMPS Users' Guide 2365 2366 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2367 @*/ 2368 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2369 { 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 PetscValidType(F,1); 2374 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2375 PetscValidRealPointer(val,3); 2376 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 /*@ 2381 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2382 2383 Logically Collective on Mat 2384 2385 Input Parameters: 2386 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2387 - icntl - index of MUMPS parameter array RINFOG() 2388 2389 Output Parameter: 2390 . val - value of MUMPS RINFOG(icntl) 2391 2392 Level: beginner 2393 2394 References: 2395 . MUMPS Users' Guide 2396 2397 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2398 @*/ 2399 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2400 { 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 PetscValidType(F,1); 2405 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2406 PetscValidRealPointer(val,3); 2407 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /*MC 2412 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2413 distributed and sequential matrices via the external package MUMPS. 2414 2415 Works with MATAIJ and MATSBAIJ matrices 2416 2417 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2418 2419 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2420 2421 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2422 2423 Options Database Keys: 2424 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2425 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2426 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2427 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2428 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2429 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2430 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2431 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2432 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2433 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2434 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2435 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2436 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2437 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2438 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2439 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2440 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2441 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2442 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2443 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2444 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2445 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2446 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2447 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2448 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2449 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2450 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2451 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2452 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2453 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2454 2455 Level: beginner 2456 2457 Notes: 2458 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2459 $ KSPGetPC(ksp,&pc); 2460 $ PCFactorGetMatrix(pc,&mat); 2461 $ MatMumpsGetInfo(mat,....); 2462 $ MatMumpsGetInfog(mat,....); etc. 2463 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2464 2465 If you want to run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still want to run the non-MUMPS part 2466 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2467 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2468 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or open sourced 2469 OpenBLAS (PETSc has configure options to install/specify them). With these conditions met, you can run your program as before but with 2470 an extra option -mat_mumps_use_omp_threads [m]. It works as if we set OMP_NUM_THREADS=m to MUMPS, with m defaults to the number of cores 2471 per CPU socket (or package, in hwloc term), or number of PETSc MPI processes on a node, whichever is smaller. 2472 2473 By flat-MPI or pure-MPI mode, it means you run your code with as many MPI ranks as the number of cores. For example, 2474 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test". To run MPI+OpenMP hybrid MUMPS, 2475 the tranditional way is to set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2476 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". The problem of this approach is that the non-MUMPS 2477 part of your code is run with fewer cores and CPUs are wasted. "-mat_mumps_use_omp_threads [m]" provides an alternative such that 2478 you can stil run your code with as many MPI ranks as the number of cores, but have MUMPS run in MPI+OpenMP hybrid mode. In our example, 2479 you can use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16". 2480 2481 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type to get MPI 2482 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2483 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2484 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2485 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2486 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2487 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2488 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2489 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2490 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2491 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbsoe -m block:block to map consecutive MPI ranks to sockets and 2492 examine the mapping result. 2493 2494 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2495 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2496 calls omp_set_num_threads(m) internally before calling MUMPS. 2497 2498 References: 2499 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2500 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2501 2502 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2503 2504 M*/ 2505 2506 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2507 { 2508 PetscFunctionBegin; 2509 *type = MATSOLVERMUMPS; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /* MatGetFactor for Seq and MPI AIJ matrices */ 2514 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2515 { 2516 Mat B; 2517 PetscErrorCode ierr; 2518 Mat_MUMPS *mumps; 2519 PetscBool isSeqAIJ; 2520 2521 PetscFunctionBegin; 2522 /* Create the factorization matrix */ 2523 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2524 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2525 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2526 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2527 ierr = MatSetUp(B);CHKERRQ(ierr); 2528 2529 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2530 2531 B->ops->view = MatView_MUMPS; 2532 B->ops->getinfo = MatGetInfo_MUMPS; 2533 2534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2540 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2545 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2547 2548 if (ftype == MAT_FACTOR_LU) { 2549 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2550 B->factortype = MAT_FACTOR_LU; 2551 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2552 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2553 mumps->sym = 0; 2554 } else { 2555 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2556 B->factortype = MAT_FACTOR_CHOLESKY; 2557 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2558 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2559 #if defined(PETSC_USE_COMPLEX) 2560 mumps->sym = 2; 2561 #else 2562 if (A->spd_set && A->spd) mumps->sym = 1; 2563 else mumps->sym = 2; 2564 #endif 2565 } 2566 2567 /* set solvertype */ 2568 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2569 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2570 2571 B->ops->destroy = MatDestroy_MUMPS; 2572 B->data = (void*)mumps; 2573 2574 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2575 2576 *F = B; 2577 PetscFunctionReturn(0); 2578 } 2579 2580 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2581 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2582 { 2583 Mat B; 2584 PetscErrorCode ierr; 2585 Mat_MUMPS *mumps; 2586 PetscBool isSeqSBAIJ; 2587 2588 PetscFunctionBegin; 2589 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2590 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2591 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2592 /* Create the factorization matrix */ 2593 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2594 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2595 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2596 ierr = MatSetUp(B);CHKERRQ(ierr); 2597 2598 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2599 if (isSeqSBAIJ) { 2600 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2601 } else { 2602 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2603 } 2604 2605 B->ops->getinfo = MatGetInfo_External; 2606 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2607 B->ops->view = MatView_MUMPS; 2608 2609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2622 2623 B->factortype = MAT_FACTOR_CHOLESKY; 2624 #if defined(PETSC_USE_COMPLEX) 2625 mumps->sym = 2; 2626 #else 2627 if (A->spd_set && A->spd) mumps->sym = 1; 2628 else mumps->sym = 2; 2629 #endif 2630 2631 /* set solvertype */ 2632 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2633 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2634 2635 B->ops->destroy = MatDestroy_MUMPS; 2636 B->data = (void*)mumps; 2637 2638 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2639 2640 *F = B; 2641 PetscFunctionReturn(0); 2642 } 2643 2644 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2645 { 2646 Mat B; 2647 PetscErrorCode ierr; 2648 Mat_MUMPS *mumps; 2649 PetscBool isSeqBAIJ; 2650 2651 PetscFunctionBegin; 2652 /* Create the factorization matrix */ 2653 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2654 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2655 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2656 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2657 ierr = MatSetUp(B);CHKERRQ(ierr); 2658 2659 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2660 if (ftype == MAT_FACTOR_LU) { 2661 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2662 B->factortype = MAT_FACTOR_LU; 2663 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2664 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2665 mumps->sym = 0; 2666 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2667 2668 B->ops->getinfo = MatGetInfo_External; 2669 B->ops->view = MatView_MUMPS; 2670 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2684 2685 /* set solvertype */ 2686 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2687 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2688 2689 B->ops->destroy = MatDestroy_MUMPS; 2690 B->data = (void*)mumps; 2691 2692 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2693 2694 *F = B; 2695 PetscFunctionReturn(0); 2696 } 2697 2698 /* MatGetFactor for Seq and MPI SELL matrices */ 2699 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2700 { 2701 Mat B; 2702 PetscErrorCode ierr; 2703 Mat_MUMPS *mumps; 2704 PetscBool isSeqSELL; 2705 2706 PetscFunctionBegin; 2707 /* Create the factorization matrix */ 2708 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2709 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2710 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2711 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2712 ierr = MatSetUp(B);CHKERRQ(ierr); 2713 2714 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2715 2716 B->ops->view = MatView_MUMPS; 2717 B->ops->getinfo = MatGetInfo_MUMPS; 2718 2719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2720 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2721 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2722 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2723 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2724 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2725 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2726 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2728 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2729 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2730 2731 if (ftype == MAT_FACTOR_LU) { 2732 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2733 B->factortype = MAT_FACTOR_LU; 2734 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2735 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2736 mumps->sym = 0; 2737 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2738 2739 /* set solvertype */ 2740 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2741 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2742 2743 B->ops->destroy = MatDestroy_MUMPS; 2744 B->data = (void*)mumps; 2745 2746 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2747 2748 *F = B; 2749 PetscFunctionReturn(0); 2750 } 2751 2752 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2753 { 2754 PetscErrorCode ierr; 2755 2756 PetscFunctionBegin; 2757 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2758 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2759 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2760 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2761 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2762 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2763 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2764 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2765 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2766 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2767 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2768 PetscFunctionReturn(0); 2769 } 2770 2771