1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 344 PetscFunctionBegin; 345 *v = aa->a; 346 if (reuse == MAT_INITIAL_MATRIX) { 347 nz = aa->nz; 348 ai = aa->i; 349 aj = aa->j; 350 *v = aa->a; 351 *nnz = nz; 352 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 353 col = row + nz; 354 355 nz = 0; 356 for (i=0; i<M; i++) { 357 rnz = ai[i+1] - ai[i]; 358 ajj = aj + ai[i]; 359 for (j=0; j<rnz; j++) { 360 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 361 } 362 } 363 *r = row; *c = col; 364 } 365 PetscFunctionReturn(0); 366 } 367 368 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 369 { 370 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 371 PetscInt nz,rnz,i,j; 372 const PetscScalar *av,*v1; 373 PetscScalar *val; 374 PetscErrorCode ierr; 375 PetscInt *row,*col; 376 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 377 PetscBool missing; 378 379 PetscFunctionBegin; 380 ai = aa->i; aj = aa->j; av = aa->a; 381 adiag = aa->diag; 382 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 383 if (reuse == MAT_INITIAL_MATRIX) { 384 /* count nz in the upper triangular part of A */ 385 nz = 0; 386 if (missing) { 387 for (i=0; i<M; i++) { 388 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 389 for (j=ai[i];j<ai[i+1];j++) { 390 if (aj[j] < i) continue; 391 nz++; 392 } 393 } else { 394 nz += ai[i+1] - adiag[i]; 395 } 396 } 397 } else { 398 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 399 } 400 *nnz = nz; 401 402 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 403 col = row + nz; 404 val = (PetscScalar*)(col + nz); 405 406 nz = 0; 407 if (missing) { 408 for (i=0; i<M; i++) { 409 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 410 for (j=ai[i];j<ai[i+1];j++) { 411 if (aj[j] < i) continue; 412 row[nz] = i+shift; 413 col[nz] = aj[j]+shift; 414 val[nz] = av[j]; 415 nz++; 416 } 417 } else { 418 rnz = ai[i+1] - adiag[i]; 419 ajj = aj + adiag[i]; 420 v1 = av + adiag[i]; 421 for (j=0; j<rnz; j++) { 422 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 423 } 424 } 425 } 426 } else { 427 for (i=0; i<M; i++) { 428 rnz = ai[i+1] - adiag[i]; 429 ajj = aj + adiag[i]; 430 v1 = av + adiag[i]; 431 for (j=0; j<rnz; j++) { 432 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 433 } 434 } 435 } 436 *r = row; *c = col; *v = val; 437 } else { 438 nz = 0; val = *v; 439 if (missing) { 440 for (i=0; i <M; i++) { 441 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 442 for (j=ai[i];j<ai[i+1];j++) { 443 if (aj[j] < i) continue; 444 val[nz++] = av[j]; 445 } 446 } else { 447 rnz = ai[i+1] - adiag[i]; 448 v1 = av + adiag[i]; 449 for (j=0; j<rnz; j++) { 450 val[nz++] = v1[j]; 451 } 452 } 453 } 454 } else { 455 for (i=0; i <M; i++) { 456 rnz = ai[i+1] - adiag[i]; 457 v1 = av + adiag[i]; 458 for (j=0; j<rnz; j++) { 459 val[nz++] = v1[j]; 460 } 461 } 462 } 463 } 464 PetscFunctionReturn(0); 465 } 466 467 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 468 { 469 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 470 PetscErrorCode ierr; 471 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 472 PetscInt *row,*col; 473 const PetscScalar *av, *bv,*v1,*v2; 474 PetscScalar *val; 475 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 476 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 477 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 478 479 PetscFunctionBegin; 480 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 481 av=aa->a; bv=bb->a; 482 483 garray = mat->garray; 484 485 if (reuse == MAT_INITIAL_MATRIX) { 486 nz = aa->nz + bb->nz; 487 *nnz = nz; 488 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 489 col = row + nz; 490 val = (PetscScalar*)(col + nz); 491 492 *r = row; *c = col; *v = val; 493 } else { 494 row = *r; col = *c; val = *v; 495 } 496 497 jj = 0; irow = rstart; 498 for (i=0; i<m; i++) { 499 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 500 countA = ai[i+1] - ai[i]; 501 countB = bi[i+1] - bi[i]; 502 bjj = bj + bi[i]; 503 v1 = av + ai[i]; 504 v2 = bv + bi[i]; 505 506 /* A-part */ 507 for (j=0; j<countA; j++) { 508 if (reuse == MAT_INITIAL_MATRIX) { 509 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 510 } 511 val[jj++] = v1[j]; 512 } 513 514 /* B-part */ 515 for (j=0; j < countB; j++) { 516 if (reuse == MAT_INITIAL_MATRIX) { 517 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 518 } 519 val[jj++] = v2[j]; 520 } 521 irow++; 522 } 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 527 { 528 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av, *bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 535 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 536 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 537 538 PetscFunctionBegin; 539 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 540 av=aa->a; bv=bb->a; 541 542 garray = mat->garray; 543 544 if (reuse == MAT_INITIAL_MATRIX) { 545 nz = aa->nz + bb->nz; 546 *nnz = nz; 547 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 548 col = row + nz; 549 val = (PetscScalar*)(col + nz); 550 551 *r = row; *c = col; *v = val; 552 } else { 553 row = *r; col = *c; val = *v; 554 } 555 556 jj = 0; irow = rstart; 557 for (i=0; i<m; i++) { 558 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 559 countA = ai[i+1] - ai[i]; 560 countB = bi[i+1] - bi[i]; 561 bjj = bj + bi[i]; 562 v1 = av + ai[i]; 563 v2 = bv + bi[i]; 564 565 /* A-part */ 566 for (j=0; j<countA; j++) { 567 if (reuse == MAT_INITIAL_MATRIX) { 568 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 569 } 570 val[jj++] = v1[j]; 571 } 572 573 /* B-part */ 574 for (j=0; j < countB; j++) { 575 if (reuse == MAT_INITIAL_MATRIX) { 576 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 577 } 578 val[jj++] = v2[j]; 579 } 580 irow++; 581 } 582 PetscFunctionReturn(0); 583 } 584 585 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 586 { 587 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 588 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 589 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 590 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 591 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 592 const PetscInt bs2=mat->bs2; 593 PetscErrorCode ierr; 594 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 595 PetscInt *row,*col; 596 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 597 PetscScalar *val; 598 599 PetscFunctionBegin; 600 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 601 if (reuse == MAT_INITIAL_MATRIX) { 602 nz = bs2*(aa->nz + bb->nz); 603 *nnz = nz; 604 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 605 col = row + nz; 606 val = (PetscScalar*)(col + nz); 607 608 *r = row; *c = col; *v = val; 609 } else { 610 row = *r; col = *c; val = *v; 611 } 612 613 jj = 0; irow = rstart; 614 for (i=0; i<mbs; i++) { 615 countA = ai[i+1] - ai[i]; 616 countB = bi[i+1] - bi[i]; 617 ajj = aj + ai[i]; 618 bjj = bj + bi[i]; 619 v1 = av + bs2*ai[i]; 620 v2 = bv + bs2*bi[i]; 621 622 idx = 0; 623 /* A-part */ 624 for (k=0; k<countA; k++) { 625 for (j=0; j<bs; j++) { 626 for (n=0; n<bs; n++) { 627 if (reuse == MAT_INITIAL_MATRIX) { 628 row[jj] = irow + n + shift; 629 col[jj] = rstart + bs*ajj[k] + j + shift; 630 } 631 val[jj++] = v1[idx++]; 632 } 633 } 634 } 635 636 idx = 0; 637 /* B-part */ 638 for (k=0; k<countB; k++) { 639 for (j=0; j<bs; j++) { 640 for (n=0; n<bs; n++) { 641 if (reuse == MAT_INITIAL_MATRIX) { 642 row[jj] = irow + n + shift; 643 col[jj] = bs*garray[bjj[k]] + j + shift; 644 } 645 val[jj++] = v2[idx++]; 646 } 647 } 648 } 649 irow += bs; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 655 { 656 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 657 PetscErrorCode ierr; 658 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 659 PetscInt *row,*col; 660 const PetscScalar *av, *bv,*v1,*v2; 661 PetscScalar *val; 662 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 663 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 664 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 665 666 PetscFunctionBegin; 667 ai=aa->i; aj=aa->j; adiag=aa->diag; 668 bi=bb->i; bj=bb->j; garray = mat->garray; 669 av=aa->a; bv=bb->a; 670 671 rstart = A->rmap->rstart; 672 673 if (reuse == MAT_INITIAL_MATRIX) { 674 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 675 nzb = 0; /* num of upper triangular entries in mat->B */ 676 for (i=0; i<m; i++) { 677 nza += (ai[i+1] - adiag[i]); 678 countB = bi[i+1] - bi[i]; 679 bjj = bj + bi[i]; 680 for (j=0; j<countB; j++) { 681 if (garray[bjj[j]] > rstart) nzb++; 682 } 683 } 684 685 nz = nza + nzb; /* total nz of upper triangular part of mat */ 686 *nnz = nz; 687 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 688 col = row + nz; 689 val = (PetscScalar*)(col + nz); 690 691 *r = row; *c = col; *v = val; 692 } else { 693 row = *r; col = *c; val = *v; 694 } 695 696 jj = 0; irow = rstart; 697 for (i=0; i<m; i++) { 698 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 699 v1 = av + adiag[i]; 700 countA = ai[i+1] - adiag[i]; 701 countB = bi[i+1] - bi[i]; 702 bjj = bj + bi[i]; 703 v2 = bv + bi[i]; 704 705 /* A-part */ 706 for (j=0; j<countA; j++) { 707 if (reuse == MAT_INITIAL_MATRIX) { 708 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 709 } 710 val[jj++] = v1[j]; 711 } 712 713 /* B-part */ 714 for (j=0; j < countB; j++) { 715 if (garray[bjj[j]] > rstart) { 716 if (reuse == MAT_INITIAL_MATRIX) { 717 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 718 } 719 val[jj++] = v2[j]; 720 } 721 } 722 irow++; 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatDestroy_MUMPS(Mat A) 728 { 729 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 734 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 735 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 736 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 737 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 738 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 739 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 740 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 741 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 742 mumps->id.job = JOB_END; 743 PetscMUMPS_c(mumps); 744 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 745 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 746 #endif 747 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 748 ierr = PetscFree(A->data);CHKERRQ(ierr); 749 750 /* clear composed functions */ 751 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 768 { 769 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 770 PetscScalar *array; 771 Vec b_seq; 772 IS is_iden,is_petsc; 773 PetscErrorCode ierr; 774 PetscInt i; 775 PetscBool second_solve = PETSC_FALSE; 776 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 777 778 PetscFunctionBegin; 779 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 780 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 781 782 if (A->factorerrortype) { 783 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 784 ierr = VecSetInf(x);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 mumps->id.ICNTL(20)= 0; /* dense RHS */ 789 mumps->id.nrhs = 1; 790 b_seq = mumps->b_seq; 791 if (mumps->petsc_size > 1) { 792 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 793 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 796 } else { /* petsc_size == 1 */ 797 ierr = VecCopy(b,x);CHKERRQ(ierr); 798 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 799 } 800 if (!mumps->myid) { /* define rhs on the host */ 801 mumps->id.nrhs = 1; 802 mumps->id.rhs = (MumpsScalar*)array; 803 } 804 805 /* 806 handle condensation step of Schur complement (if any) 807 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 808 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 809 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 810 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 811 */ 812 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 813 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 814 second_solve = PETSC_TRUE; 815 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 816 } 817 /* solve phase */ 818 /*-------------*/ 819 mumps->id.job = JOB_SOLVE; 820 PetscMUMPS_c(mumps); 821 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 822 823 /* handle expansion step of Schur complement (if any) */ 824 if (second_solve) { 825 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 826 } 827 828 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 829 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 830 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 831 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 832 } 833 if (!mumps->scat_sol) { /* create scatter scat_sol */ 834 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 835 for (i=0; i<mumps->id.lsol_loc; i++) { 836 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 837 } 838 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 839 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 840 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 841 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 842 843 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 844 } 845 846 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 } 849 850 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 851 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 852 853 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 858 { 859 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 mumps->id.ICNTL(9) = 0; 864 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 865 mumps->id.ICNTL(9) = 1; 866 PetscFunctionReturn(0); 867 } 868 869 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 870 { 871 PetscErrorCode ierr; 872 Mat Bt = NULL; 873 PetscBool flg, flgT; 874 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 875 PetscInt i,nrhs,M; 876 PetscScalar *array; 877 const PetscScalar *rbray; 878 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 879 PetscScalar *bray,*sol_loc,*sol_loc_save; 880 IS is_to,is_from; 881 PetscInt k,proc,j,m,myrstart; 882 const PetscInt *rstart; 883 Vec v_mpi,b_seq,msol_loc; 884 VecScatter scat_rhs,scat_sol; 885 PetscScalar *aa; 886 PetscInt spnr,*ia,*ja; 887 Mat_MPIAIJ *b = NULL; 888 889 PetscFunctionBegin; 890 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 891 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 892 893 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 894 if (flg) { /* dense B */ 895 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 896 mumps->id.ICNTL(20)= 0; /* dense RHS */ 897 } else { /* sparse B */ 898 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 899 if (flgT) { /* input B is transpose of actural RHS matrix, 900 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 901 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 902 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 903 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 904 } 905 906 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 907 mumps->id.nrhs = nrhs; 908 mumps->id.lrhs = M; 909 mumps->id.rhs = NULL; 910 911 if (mumps->petsc_size == 1) { 912 PetscScalar *aa; 913 PetscInt spnr,*ia,*ja; 914 PetscBool second_solve = PETSC_FALSE; 915 916 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 917 mumps->id.rhs = (MumpsScalar*)array; 918 919 if (!Bt) { /* dense B */ 920 /* copy B to X */ 921 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 922 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 923 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 924 } else { /* sparse B */ 925 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 926 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 927 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 928 /* mumps requires ia and ja start at 1! */ 929 mumps->id.irhs_ptr = ia; 930 mumps->id.irhs_sparse = ja; 931 mumps->id.nz_rhs = ia[spnr] - 1; 932 mumps->id.rhs_sparse = (MumpsScalar*)aa; 933 } 934 /* handle condensation step of Schur complement (if any) */ 935 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 936 second_solve = PETSC_TRUE; 937 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 938 } 939 /* solve phase */ 940 /*-------------*/ 941 mumps->id.job = JOB_SOLVE; 942 PetscMUMPS_c(mumps); 943 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 944 945 /* handle expansion step of Schur complement (if any) */ 946 if (second_solve) { 947 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 948 } 949 if (Bt) { /* sparse B */ 950 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 951 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 952 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 953 } 954 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 959 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 960 961 /* create msol_loc to hold mumps local solution */ 962 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 963 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 964 965 lsol_loc = mumps->id.lsol_loc; 966 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 967 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 968 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 969 mumps->id.isol_loc = isol_loc; 970 971 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 972 973 if (!Bt) { /* dense B */ 974 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 975 /* wrap dense rhs matrix B into a vector v_mpi */ 976 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 977 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 978 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 979 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 980 981 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 982 if (!mumps->myid) { 983 PetscInt *idx; 984 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 985 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 986 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 987 k = 0; 988 for (proc=0; proc<mumps->petsc_size; proc++){ 989 for (j=0; j<nrhs; j++){ 990 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 991 } 992 } 993 994 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 995 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 996 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 997 } else { 998 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 999 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1000 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1001 } 1002 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1005 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 1008 if (!mumps->myid) { /* define rhs on the host */ 1009 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1010 mumps->id.rhs = (MumpsScalar*)bray; 1011 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1012 } 1013 1014 } else { /* sparse B */ 1015 b = (Mat_MPIAIJ*)Bt->data; 1016 1017 /* wrap dense X into a vector v_mpi */ 1018 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1019 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1020 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1021 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1022 1023 if (!mumps->myid) { 1024 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1025 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1026 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1027 /* mumps requires ia and ja start at 1! */ 1028 mumps->id.irhs_ptr = ia; 1029 mumps->id.irhs_sparse = ja; 1030 mumps->id.nz_rhs = ia[spnr] - 1; 1031 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1032 } else { 1033 mumps->id.irhs_ptr = NULL; 1034 mumps->id.irhs_sparse = NULL; 1035 mumps->id.nz_rhs = 0; 1036 mumps->id.rhs_sparse = NULL; 1037 } 1038 } 1039 1040 /* solve phase */ 1041 /*-------------*/ 1042 mumps->id.job = JOB_SOLVE; 1043 PetscMUMPS_c(mumps); 1044 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1045 1046 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1047 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1048 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1049 1050 /* create scatter scat_sol */ 1051 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1052 /* iidx: index for scatter mumps solution to petsc X */ 1053 1054 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1055 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1056 for (i=0; i<lsol_loc; i++) { 1057 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1058 1059 for (proc=0; proc<mumps->petsc_size; proc++){ 1060 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1061 myrstart = rstart[proc]; 1062 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1063 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1064 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1065 break; 1066 } 1067 } 1068 1069 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1070 } 1071 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1072 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1073 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1075 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1078 1079 /* free spaces */ 1080 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1081 mumps->id.isol_loc = isol_loc_save; 1082 1083 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1084 ierr = PetscFree(idxx);CHKERRQ(ierr); 1085 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1086 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1087 if (Bt) { 1088 if (!mumps->myid) { 1089 b = (Mat_MPIAIJ*)Bt->data; 1090 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1091 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1092 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1093 } 1094 } else { 1095 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1096 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1097 } 1098 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1099 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1104 { 1105 PetscErrorCode ierr; 1106 PetscBool flg; 1107 Mat B; 1108 1109 PetscFunctionBegin; 1110 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1111 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1112 1113 /* Create B=Bt^T that uses Bt's data structure */ 1114 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1115 1116 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1117 ierr = MatDestroy(&B);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #if !defined(PETSC_USE_COMPLEX) 1122 /* 1123 input: 1124 F: numeric factor 1125 output: 1126 nneg: total number of negative pivots 1127 nzero: total number of zero pivots 1128 npos: (global dimension of F) - nneg - nzero 1129 */ 1130 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1131 { 1132 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1133 PetscErrorCode ierr; 1134 PetscMPIInt size; 1135 1136 PetscFunctionBegin; 1137 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1138 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1139 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1140 1141 if (nneg) *nneg = mumps->id.INFOG(12); 1142 if (nzero || npos) { 1143 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1144 if (nzero) *nzero = mumps->id.INFOG(28); 1145 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1146 } 1147 PetscFunctionReturn(0); 1148 } 1149 #endif 1150 1151 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1152 { 1153 PetscErrorCode ierr; 1154 PetscInt i,nz=0,*irn,*jcn=0; 1155 PetscScalar *val=0; 1156 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1157 1158 PetscFunctionBegin; 1159 if (mumps->omp_comm_size > 1) { 1160 if (reuse == MAT_INITIAL_MATRIX) { 1161 /* master first gathers counts of nonzeros to receive */ 1162 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1163 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1164 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1165 1166 /* master allocates memory to receive nonzeros */ 1167 if (mumps->is_omp_master) { 1168 displs[0] = 0; 1169 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1170 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1171 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1172 jcn = irn + nz; 1173 val = (PetscScalar*)(jcn + nz); 1174 } 1175 1176 /* save the gatherv plan */ 1177 mumps->mpinz = mpinz; /* used as send count */ 1178 mumps->recvcount = recvcount; 1179 mumps->displs = displs; 1180 1181 /* master gathers nonzeros */ 1182 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1183 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1184 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1185 1186 /* master frees its row/col/val and replaces them with bigger arrays */ 1187 if (mumps->is_omp_master) { 1188 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1189 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1190 mumps->irn = irn; 1191 mumps->jcn = jcn; 1192 mumps->val = val; 1193 } 1194 } else { 1195 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1196 } 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1202 { 1203 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1204 PetscErrorCode ierr; 1205 PetscBool isMPIAIJ; 1206 1207 PetscFunctionBegin; 1208 if (mumps->id.INFOG(1) < 0) { 1209 if (mumps->id.INFOG(1) == -6) { 1210 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1211 } 1212 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1217 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1218 1219 /* numerical factorization phase */ 1220 /*-------------------------------*/ 1221 mumps->id.job = JOB_FACTNUMERIC; 1222 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1223 if (!mumps->myid) { 1224 mumps->id.a = (MumpsScalar*)mumps->val; 1225 } 1226 } else { 1227 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1228 } 1229 PetscMUMPS_c(mumps); 1230 if (mumps->id.INFOG(1) < 0) { 1231 if (A->erroriffailure) { 1232 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1233 } else { 1234 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1235 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1236 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1237 } else if (mumps->id.INFOG(1) == -13) { 1238 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1239 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1240 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1241 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1242 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1243 } else { 1244 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1245 F->factorerrortype = MAT_FACTOR_OTHER; 1246 } 1247 } 1248 } 1249 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1250 1251 F->assembled = PETSC_TRUE; 1252 mumps->matstruc = SAME_NONZERO_PATTERN; 1253 if (F->schur) { /* reset Schur status to unfactored */ 1254 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1255 mumps->id.ICNTL(19) = 2; 1256 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1257 } 1258 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1259 } 1260 1261 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1262 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1263 1264 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1265 if (mumps->petsc_size > 1) { 1266 PetscInt lsol_loc; 1267 PetscScalar *sol_loc; 1268 1269 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1270 1271 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1272 if (mumps->x_seq) { 1273 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1274 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1275 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1276 } 1277 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1278 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1279 mumps->id.lsol_loc = lsol_loc; 1280 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1281 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1282 } 1283 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 /* Sets MUMPS options from the options database */ 1288 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1289 { 1290 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1291 PetscErrorCode ierr; 1292 PetscInt icntl,info[80],i,ninfo=80; 1293 PetscBool flg; 1294 1295 PetscFunctionBegin; 1296 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1297 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1298 if (flg) mumps->id.ICNTL(1) = icntl; 1299 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1300 if (flg) mumps->id.ICNTL(2) = icntl; 1301 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1302 if (flg) mumps->id.ICNTL(3) = icntl; 1303 1304 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1305 if (flg) mumps->id.ICNTL(4) = icntl; 1306 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1307 1308 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1309 if (flg) mumps->id.ICNTL(6) = icntl; 1310 1311 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1312 if (flg) { 1313 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1314 else mumps->id.ICNTL(7) = icntl; 1315 } 1316 1317 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1318 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1319 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1320 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1321 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1322 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1323 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1324 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1325 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1326 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1327 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1328 } 1329 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1330 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1331 1332 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1333 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1334 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1335 if (mumps->id.ICNTL(24)) { 1336 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1337 } 1338 1339 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1340 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1341 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1342 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1343 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1344 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1345 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1346 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1348 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1349 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1350 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1351 1352 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1353 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1354 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1355 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1356 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1357 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1358 1359 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1360 1361 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1362 if (ninfo) { 1363 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1364 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1365 mumps->ninfo = ninfo; 1366 for (i=0; i<ninfo; i++) { 1367 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1368 else mumps->info[i] = info[i]; 1369 } 1370 } 1371 1372 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1377 { 1378 PetscErrorCode ierr; 1379 PetscInt nthreads=0; 1380 1381 PetscFunctionBegin; 1382 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1383 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1384 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1385 1386 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1387 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1388 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1389 if (mumps->use_petsc_omp_support) { 1390 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1391 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1392 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1393 #else 1394 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1395 #endif 1396 } else { 1397 mumps->omp_comm = PETSC_COMM_SELF; 1398 mumps->mumps_comm = mumps->petsc_comm; 1399 mumps->is_omp_master = PETSC_TRUE; 1400 } 1401 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1402 1403 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1404 mumps->id.job = JOB_INIT; 1405 mumps->id.par = 1; /* host participates factorizaton and solve */ 1406 mumps->id.sym = mumps->sym; 1407 1408 PetscMUMPS_c(mumps); 1409 1410 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1411 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1412 */ 1413 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1414 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1415 1416 mumps->scat_rhs = NULL; 1417 mumps->scat_sol = NULL; 1418 1419 /* set PETSc-MUMPS default options - override MUMPS default */ 1420 mumps->id.ICNTL(3) = 0; 1421 mumps->id.ICNTL(4) = 0; 1422 if (mumps->petsc_size == 1) { 1423 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1424 } else { 1425 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1426 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1427 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1428 } 1429 1430 /* schur */ 1431 mumps->id.size_schur = 0; 1432 mumps->id.listvar_schur = NULL; 1433 mumps->id.schur = NULL; 1434 mumps->sizeredrhs = 0; 1435 mumps->schur_sol = NULL; 1436 mumps->schur_sizesol = 0; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1441 { 1442 PetscErrorCode ierr; 1443 1444 PetscFunctionBegin; 1445 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1446 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1447 if (mumps->id.INFOG(1) < 0) { 1448 if (A->erroriffailure) { 1449 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1450 } else { 1451 if (mumps->id.INFOG(1) == -6) { 1452 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1453 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1454 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1455 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1456 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1457 } else { 1458 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1459 F->factorerrortype = MAT_FACTOR_OTHER; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1467 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1468 { 1469 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1470 PetscErrorCode ierr; 1471 Vec b; 1472 const PetscInt M = A->rmap->N; 1473 1474 PetscFunctionBegin; 1475 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1476 1477 /* Set MUMPS options from the options database */ 1478 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1479 1480 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1481 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1482 1483 /* analysis phase */ 1484 /*----------------*/ 1485 mumps->id.job = JOB_FACTSYMBOLIC; 1486 mumps->id.n = M; 1487 switch (mumps->id.ICNTL(18)) { 1488 case 0: /* centralized assembled matrix input */ 1489 if (!mumps->myid) { 1490 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1491 if (mumps->id.ICNTL(6)>1) { 1492 mumps->id.a = (MumpsScalar*)mumps->val; 1493 } 1494 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1495 /* 1496 PetscBool flag; 1497 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1498 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1499 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1500 */ 1501 if (!mumps->myid) { 1502 const PetscInt *idx; 1503 PetscInt i,*perm_in; 1504 1505 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1506 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1507 1508 mumps->id.perm_in = perm_in; 1509 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1510 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1511 } 1512 } 1513 } 1514 break; 1515 case 3: /* distributed assembled matrix input (size>1) */ 1516 mumps->id.nz_loc = mumps->nz; 1517 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1518 if (mumps->id.ICNTL(6)>1) { 1519 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1520 } 1521 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1522 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1523 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1524 ierr = VecDestroy(&b);CHKERRQ(ierr); 1525 break; 1526 } 1527 PetscMUMPS_c(mumps); 1528 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1529 1530 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1531 F->ops->solve = MatSolve_MUMPS; 1532 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1533 F->ops->matsolve = MatMatSolve_MUMPS; 1534 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1535 PetscFunctionReturn(0); 1536 } 1537 1538 /* Note the Petsc r and c permutations are ignored */ 1539 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1540 { 1541 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1542 PetscErrorCode ierr; 1543 Vec b; 1544 const PetscInt M = A->rmap->N; 1545 1546 PetscFunctionBegin; 1547 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1548 1549 /* Set MUMPS options from the options database */ 1550 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1551 1552 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1553 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1554 1555 /* analysis phase */ 1556 /*----------------*/ 1557 mumps->id.job = JOB_FACTSYMBOLIC; 1558 mumps->id.n = M; 1559 switch (mumps->id.ICNTL(18)) { 1560 case 0: /* centralized assembled matrix input */ 1561 if (!mumps->myid) { 1562 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1563 if (mumps->id.ICNTL(6)>1) { 1564 mumps->id.a = (MumpsScalar*)mumps->val; 1565 } 1566 } 1567 break; 1568 case 3: /* distributed assembled matrix input (size>1) */ 1569 mumps->id.nz_loc = mumps->nz; 1570 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1571 if (mumps->id.ICNTL(6)>1) { 1572 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1573 } 1574 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1575 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1576 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1577 ierr = VecDestroy(&b);CHKERRQ(ierr); 1578 break; 1579 } 1580 PetscMUMPS_c(mumps); 1581 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1582 1583 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1584 F->ops->solve = MatSolve_MUMPS; 1585 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 /* Note the Petsc r permutation and factor info are ignored */ 1590 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1591 { 1592 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1593 PetscErrorCode ierr; 1594 Vec b; 1595 const PetscInt M = A->rmap->N; 1596 1597 PetscFunctionBegin; 1598 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1599 1600 /* Set MUMPS options from the options database */ 1601 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1602 1603 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1604 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1605 1606 /* analysis phase */ 1607 /*----------------*/ 1608 mumps->id.job = JOB_FACTSYMBOLIC; 1609 mumps->id.n = M; 1610 switch (mumps->id.ICNTL(18)) { 1611 case 0: /* centralized assembled matrix input */ 1612 if (!mumps->myid) { 1613 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1614 if (mumps->id.ICNTL(6)>1) { 1615 mumps->id.a = (MumpsScalar*)mumps->val; 1616 } 1617 } 1618 break; 1619 case 3: /* distributed assembled matrix input (size>1) */ 1620 mumps->id.nz_loc = mumps->nz; 1621 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1622 if (mumps->id.ICNTL(6)>1) { 1623 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1624 } 1625 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1626 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1627 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1628 ierr = VecDestroy(&b);CHKERRQ(ierr); 1629 break; 1630 } 1631 PetscMUMPS_c(mumps); 1632 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1633 1634 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1635 F->ops->solve = MatSolve_MUMPS; 1636 F->ops->solvetranspose = MatSolve_MUMPS; 1637 F->ops->matsolve = MatMatSolve_MUMPS; 1638 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1639 #if defined(PETSC_USE_COMPLEX) 1640 F->ops->getinertia = NULL; 1641 #else 1642 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1643 #endif 1644 PetscFunctionReturn(0); 1645 } 1646 1647 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1648 { 1649 PetscErrorCode ierr; 1650 PetscBool iascii; 1651 PetscViewerFormat format; 1652 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1653 1654 PetscFunctionBegin; 1655 /* check if matrix is mumps type */ 1656 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1657 1658 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1659 if (iascii) { 1660 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1661 if (format == PETSC_VIEWER_ASCII_INFO) { 1662 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1663 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1664 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1665 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1666 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1667 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1668 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1669 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1670 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1671 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1672 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1673 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1674 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1675 if (mumps->id.ICNTL(11)>0) { 1676 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1677 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1678 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1679 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1680 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1681 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1682 } 1683 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1684 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1685 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1686 /* ICNTL(15-17) not used */ 1687 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1688 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1689 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1690 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1691 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1692 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1693 1694 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1695 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1696 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1697 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1698 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1699 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1700 1701 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1702 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1703 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1704 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1705 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1706 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1707 1708 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1709 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1711 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1712 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1713 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1714 1715 /* infomation local to each processor */ 1716 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1717 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1718 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1719 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1720 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1721 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1722 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1723 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1724 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1725 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1726 1727 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1728 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1729 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1730 1731 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1732 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1733 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1734 1735 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1736 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1737 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1738 1739 if (mumps->ninfo && mumps->ninfo <= 80){ 1740 PetscInt i; 1741 for (i=0; i<mumps->ninfo; i++){ 1742 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1743 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1744 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1745 } 1746 } 1747 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1748 1749 if (!mumps->myid) { /* information from the host */ 1750 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1751 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1752 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1753 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1754 1755 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1756 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1758 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1759 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1760 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1762 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1763 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1764 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1765 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1766 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1767 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1769 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1770 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1789 } 1790 } 1791 } 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1796 { 1797 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1798 1799 PetscFunctionBegin; 1800 info->block_size = 1.0; 1801 info->nz_allocated = mumps->id.INFOG(20); 1802 info->nz_used = mumps->id.INFOG(20); 1803 info->nz_unneeded = 0.0; 1804 info->assemblies = 0.0; 1805 info->mallocs = 0.0; 1806 info->memory = 0.0; 1807 info->fill_ratio_given = 0; 1808 info->fill_ratio_needed = 0; 1809 info->factor_mallocs = 0; 1810 PetscFunctionReturn(0); 1811 } 1812 1813 /* -------------------------------------------------------------------------------------------*/ 1814 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1815 { 1816 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1817 const PetscInt *idxs; 1818 PetscInt size,i; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1823 if (mumps->petsc_size > 1) { 1824 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1825 1826 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1827 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1828 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1829 } 1830 if (mumps->id.size_schur != size) { 1831 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1832 mumps->id.size_schur = size; 1833 mumps->id.schur_lld = size; 1834 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1835 } 1836 1837 /* Schur complement matrix */ 1838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1839 if (mumps->sym == 1) { 1840 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1841 } 1842 1843 /* MUMPS expects Fortran style indices */ 1844 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1845 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1846 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1847 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1848 if (mumps->petsc_size > 1) { 1849 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1850 } else { 1851 if (F->factortype == MAT_FACTOR_LU) { 1852 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1853 } else { 1854 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1855 } 1856 } 1857 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1858 mumps->id.ICNTL(26) = -1; 1859 PetscFunctionReturn(0); 1860 } 1861 1862 /* -------------------------------------------------------------------------------------------*/ 1863 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1864 { 1865 Mat St; 1866 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1867 PetscScalar *array; 1868 #if defined(PETSC_USE_COMPLEX) 1869 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1870 #endif 1871 PetscErrorCode ierr; 1872 1873 PetscFunctionBegin; 1874 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1875 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1876 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1877 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1878 ierr = MatSetUp(St);CHKERRQ(ierr); 1879 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1880 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1881 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1882 PetscInt i,j,N=mumps->id.size_schur; 1883 for (i=0;i<N;i++) { 1884 for (j=0;j<N;j++) { 1885 #if !defined(PETSC_USE_COMPLEX) 1886 PetscScalar val = mumps->id.schur[i*N+j]; 1887 #else 1888 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1889 #endif 1890 array[j*N+i] = val; 1891 } 1892 } 1893 } else { /* stored by columns */ 1894 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1895 } 1896 } else { /* either full or lower-triangular (not packed) */ 1897 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1898 PetscInt i,j,N=mumps->id.size_schur; 1899 for (i=0;i<N;i++) { 1900 for (j=i;j<N;j++) { 1901 #if !defined(PETSC_USE_COMPLEX) 1902 PetscScalar val = mumps->id.schur[i*N+j]; 1903 #else 1904 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1905 #endif 1906 array[i*N+j] = val; 1907 array[j*N+i] = val; 1908 } 1909 } 1910 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1911 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1912 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1913 PetscInt i,j,N=mumps->id.size_schur; 1914 for (i=0;i<N;i++) { 1915 for (j=0;j<i+1;j++) { 1916 #if !defined(PETSC_USE_COMPLEX) 1917 PetscScalar val = mumps->id.schur[i*N+j]; 1918 #else 1919 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1920 #endif 1921 array[i*N+j] = val; 1922 array[j*N+i] = val; 1923 } 1924 } 1925 } 1926 } 1927 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1928 *S = St; 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* -------------------------------------------------------------------------------------------*/ 1933 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1934 { 1935 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1936 1937 PetscFunctionBegin; 1938 mumps->id.ICNTL(icntl) = ival; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1943 { 1944 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1945 1946 PetscFunctionBegin; 1947 *ival = mumps->id.ICNTL(icntl); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /*@ 1952 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 1953 1954 Logically Collective on Mat 1955 1956 Input Parameters: 1957 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1958 . icntl - index of MUMPS parameter array ICNTL() 1959 - ival - value of MUMPS ICNTL(icntl) 1960 1961 Options Database: 1962 . -mat_mumps_icntl_<icntl> <ival> 1963 1964 Level: beginner 1965 1966 References: 1967 . MUMPS Users' Guide 1968 1969 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 1970 @*/ 1971 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 1972 { 1973 PetscErrorCode ierr; 1974 1975 PetscFunctionBegin; 1976 PetscValidType(F,1); 1977 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 1978 PetscValidLogicalCollectiveInt(F,icntl,2); 1979 PetscValidLogicalCollectiveInt(F,ival,3); 1980 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /*@ 1985 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 1986 1987 Logically Collective on Mat 1988 1989 Input Parameters: 1990 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1991 - icntl - index of MUMPS parameter array ICNTL() 1992 1993 Output Parameter: 1994 . ival - value of MUMPS ICNTL(icntl) 1995 1996 Level: beginner 1997 1998 References: 1999 . MUMPS Users' Guide 2000 2001 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2002 @*/ 2003 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2004 { 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 PetscValidType(F,1); 2009 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2010 PetscValidLogicalCollectiveInt(F,icntl,2); 2011 PetscValidIntPointer(ival,3); 2012 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 /* -------------------------------------------------------------------------------------------*/ 2017 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2018 { 2019 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2020 2021 PetscFunctionBegin; 2022 mumps->id.CNTL(icntl) = val; 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2027 { 2028 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2029 2030 PetscFunctionBegin; 2031 *val = mumps->id.CNTL(icntl); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 /*@ 2036 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2037 2038 Logically Collective on Mat 2039 2040 Input Parameters: 2041 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2042 . icntl - index of MUMPS parameter array CNTL() 2043 - val - value of MUMPS CNTL(icntl) 2044 2045 Options Database: 2046 . -mat_mumps_cntl_<icntl> <val> 2047 2048 Level: beginner 2049 2050 References: 2051 . MUMPS Users' Guide 2052 2053 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2054 @*/ 2055 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2056 { 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 PetscValidType(F,1); 2061 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2062 PetscValidLogicalCollectiveInt(F,icntl,2); 2063 PetscValidLogicalCollectiveReal(F,val,3); 2064 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@ 2069 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2070 2071 Logically Collective on Mat 2072 2073 Input Parameters: 2074 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2075 - icntl - index of MUMPS parameter array CNTL() 2076 2077 Output Parameter: 2078 . val - value of MUMPS CNTL(icntl) 2079 2080 Level: beginner 2081 2082 References: 2083 . MUMPS Users' Guide 2084 2085 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2086 @*/ 2087 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2088 { 2089 PetscErrorCode ierr; 2090 2091 PetscFunctionBegin; 2092 PetscValidType(F,1); 2093 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2094 PetscValidLogicalCollectiveInt(F,icntl,2); 2095 PetscValidRealPointer(val,3); 2096 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2097 PetscFunctionReturn(0); 2098 } 2099 2100 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2101 { 2102 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2103 2104 PetscFunctionBegin; 2105 *info = mumps->id.INFO(icntl); 2106 PetscFunctionReturn(0); 2107 } 2108 2109 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2110 { 2111 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2112 2113 PetscFunctionBegin; 2114 *infog = mumps->id.INFOG(icntl); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2119 { 2120 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2121 2122 PetscFunctionBegin; 2123 *rinfo = mumps->id.RINFO(icntl); 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2128 { 2129 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2130 2131 PetscFunctionBegin; 2132 *rinfog = mumps->id.RINFOG(icntl); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2137 { 2138 PetscErrorCode ierr; 2139 Mat Bt = NULL,Btseq = NULL; 2140 PetscBool flg; 2141 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2142 PetscScalar *aa; 2143 PetscInt spnr,*ia,*ja; 2144 2145 PetscFunctionBegin; 2146 PetscValidIntPointer(spRHS,2); 2147 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2150 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2151 2152 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2153 2154 if (mumps->petsc_size > 1) { 2155 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2156 Btseq = b->A; 2157 } else { 2158 Btseq = Bt; 2159 } 2160 2161 if (!mumps->myid) { 2162 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2163 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2164 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2165 2166 mumps->id.irhs_ptr = ia; 2167 mumps->id.irhs_sparse = ja; 2168 mumps->id.nz_rhs = ia[spnr] - 1; 2169 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2170 } else { 2171 mumps->id.irhs_ptr = NULL; 2172 mumps->id.irhs_sparse = NULL; 2173 mumps->id.nz_rhs = 0; 2174 mumps->id.rhs_sparse = NULL; 2175 } 2176 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2177 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2178 2179 /* solve phase */ 2180 /*-------------*/ 2181 mumps->id.job = JOB_SOLVE; 2182 PetscMUMPS_c(mumps); 2183 if (mumps->id.INFOG(1) < 0) 2184 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2185 2186 if (!mumps->myid) { 2187 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2188 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2189 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2190 } 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /*@ 2195 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2196 2197 Logically Collective on Mat 2198 2199 Input Parameters: 2200 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2201 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2202 2203 Output Parameter: 2204 . spRHS - requested entries of inverse of A 2205 2206 Level: beginner 2207 2208 References: 2209 . MUMPS Users' Guide 2210 2211 .seealso: MatGetFactor(), MatCreateTranspose() 2212 @*/ 2213 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2214 { 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 PetscValidType(F,1); 2219 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2220 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2221 PetscFunctionReturn(0); 2222 } 2223 2224 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2225 { 2226 PetscErrorCode ierr; 2227 Mat spRHS; 2228 2229 PetscFunctionBegin; 2230 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2231 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2232 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 /*@ 2237 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2238 2239 Logically Collective on Mat 2240 2241 Input Parameters: 2242 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2243 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2244 2245 Output Parameter: 2246 . spRHST - requested entries of inverse of A^T 2247 2248 Level: beginner 2249 2250 References: 2251 . MUMPS Users' Guide 2252 2253 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2254 @*/ 2255 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2256 { 2257 PetscErrorCode ierr; 2258 PetscBool flg; 2259 2260 PetscFunctionBegin; 2261 PetscValidType(F,1); 2262 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2263 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2264 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2265 2266 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 /*@ 2271 MatMumpsGetInfo - Get MUMPS parameter INFO() 2272 2273 Logically Collective on Mat 2274 2275 Input Parameters: 2276 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2277 - icntl - index of MUMPS parameter array INFO() 2278 2279 Output Parameter: 2280 . ival - value of MUMPS INFO(icntl) 2281 2282 Level: beginner 2283 2284 References: 2285 . MUMPS Users' Guide 2286 2287 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2288 @*/ 2289 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2290 { 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 PetscValidType(F,1); 2295 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2296 PetscValidIntPointer(ival,3); 2297 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 /*@ 2302 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2303 2304 Logically Collective on Mat 2305 2306 Input Parameters: 2307 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2308 - icntl - index of MUMPS parameter array INFOG() 2309 2310 Output Parameter: 2311 . ival - value of MUMPS INFOG(icntl) 2312 2313 Level: beginner 2314 2315 References: 2316 . MUMPS Users' Guide 2317 2318 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2319 @*/ 2320 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2321 { 2322 PetscErrorCode ierr; 2323 2324 PetscFunctionBegin; 2325 PetscValidType(F,1); 2326 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2327 PetscValidIntPointer(ival,3); 2328 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2329 PetscFunctionReturn(0); 2330 } 2331 2332 /*@ 2333 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2334 2335 Logically Collective on Mat 2336 2337 Input Parameters: 2338 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2339 - icntl - index of MUMPS parameter array RINFO() 2340 2341 Output Parameter: 2342 . val - value of MUMPS RINFO(icntl) 2343 2344 Level: beginner 2345 2346 References: 2347 . MUMPS Users' Guide 2348 2349 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2350 @*/ 2351 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2352 { 2353 PetscErrorCode ierr; 2354 2355 PetscFunctionBegin; 2356 PetscValidType(F,1); 2357 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2358 PetscValidRealPointer(val,3); 2359 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 /*@ 2364 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2365 2366 Logically Collective on Mat 2367 2368 Input Parameters: 2369 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2370 - icntl - index of MUMPS parameter array RINFOG() 2371 2372 Output Parameter: 2373 . val - value of MUMPS RINFOG(icntl) 2374 2375 Level: beginner 2376 2377 References: 2378 . MUMPS Users' Guide 2379 2380 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2381 @*/ 2382 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2383 { 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 PetscValidType(F,1); 2388 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2389 PetscValidRealPointer(val,3); 2390 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 /*MC 2395 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2396 distributed and sequential matrices via the external package MUMPS. 2397 2398 Works with MATAIJ and MATSBAIJ matrices 2399 2400 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2401 2402 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2403 2404 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2405 2406 Options Database Keys: 2407 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2408 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2409 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2410 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2411 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2412 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2413 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2414 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2415 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2416 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2417 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2418 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2419 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2420 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2421 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2422 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2423 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2424 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2425 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2426 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2427 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2428 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2429 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2430 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2431 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2432 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2433 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2434 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2435 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2436 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2437 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2438 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2439 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2440 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2441 2442 Level: beginner 2443 2444 Notes: 2445 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2446 $ KSPGetPC(ksp,&pc); 2447 $ PCFactorGetMatrix(pc,&mat); 2448 $ MatMumpsGetInfo(mat,....); 2449 $ MatMumpsGetInfog(mat,....); etc. 2450 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2451 2452 If you want to run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still want to run the non-MUMPS part 2453 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2454 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2455 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or open sourced 2456 OpenBLAS (PETSc has configure options to install/specify them). With these conditions met, you can run your program as before but with 2457 an extra option -mat_mumps_use_omp_threads [m]. It works as if we set OMP_NUM_THREADS=m to MUMPS, with m defaults to the number of cores 2458 per CPU socket (or package, in hwloc term), or number of PETSc MPI processes on a node, whichever is smaller. 2459 2460 By flat-MPI or pure-MPI mode, it means you run your code with as many MPI ranks as the number of cores. For example, 2461 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test". To run MPI+OpenMP hybrid MUMPS, 2462 the tranditional way is to set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2463 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". The problem of this approach is that the non-MUMPS 2464 part of your code is run with fewer cores and CPUs are wasted. "-mat_mumps_use_omp_threads [m]" provides an alternative such that 2465 you can stil run your code with as many MPI ranks as the number of cores, but have MUMPS run in MPI+OpenMP hybrid mode. In our example, 2466 you can use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16". 2467 2468 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type to get MPI 2469 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2470 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2471 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2472 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2473 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2474 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2475 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2476 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2477 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2478 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbsoe -m block:block to map consecutive MPI ranks to sockets and 2479 examine the mapping result. 2480 2481 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2482 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2483 calls omp_set_num_threads(m) internally before calling MUMPS. 2484 2485 References: 2486 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2487 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2488 2489 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2490 2491 M*/ 2492 2493 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2494 { 2495 PetscFunctionBegin; 2496 *type = MATSOLVERMUMPS; 2497 PetscFunctionReturn(0); 2498 } 2499 2500 /* MatGetFactor for Seq and MPI AIJ matrices */ 2501 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2502 { 2503 Mat B; 2504 PetscErrorCode ierr; 2505 Mat_MUMPS *mumps; 2506 PetscBool isSeqAIJ; 2507 2508 PetscFunctionBegin; 2509 /* Create the factorization matrix */ 2510 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2511 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2512 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2513 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2514 ierr = MatSetUp(B);CHKERRQ(ierr); 2515 2516 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2517 2518 B->ops->view = MatView_MUMPS; 2519 B->ops->getinfo = MatGetInfo_MUMPS; 2520 2521 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2522 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2523 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2524 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2525 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2528 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2534 2535 if (ftype == MAT_FACTOR_LU) { 2536 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2537 B->factortype = MAT_FACTOR_LU; 2538 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2539 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2540 mumps->sym = 0; 2541 } else { 2542 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2543 B->factortype = MAT_FACTOR_CHOLESKY; 2544 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2545 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2546 #if defined(PETSC_USE_COMPLEX) 2547 mumps->sym = 2; 2548 #else 2549 if (A->spd_set && A->spd) mumps->sym = 1; 2550 else mumps->sym = 2; 2551 #endif 2552 } 2553 2554 /* set solvertype */ 2555 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2556 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2557 2558 B->ops->destroy = MatDestroy_MUMPS; 2559 B->data = (void*)mumps; 2560 2561 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2562 2563 *F = B; 2564 PetscFunctionReturn(0); 2565 } 2566 2567 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2568 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2569 { 2570 Mat B; 2571 PetscErrorCode ierr; 2572 Mat_MUMPS *mumps; 2573 PetscBool isSeqSBAIJ; 2574 2575 PetscFunctionBegin; 2576 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2577 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2578 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2579 /* Create the factorization matrix */ 2580 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2581 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2582 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2583 ierr = MatSetUp(B);CHKERRQ(ierr); 2584 2585 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2586 if (isSeqSBAIJ) { 2587 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2588 } else { 2589 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2590 } 2591 2592 B->ops->getinfo = MatGetInfo_External; 2593 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2594 B->ops->view = MatView_MUMPS; 2595 2596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2609 2610 B->factortype = MAT_FACTOR_CHOLESKY; 2611 #if defined(PETSC_USE_COMPLEX) 2612 mumps->sym = 2; 2613 #else 2614 if (A->spd_set && A->spd) mumps->sym = 1; 2615 else mumps->sym = 2; 2616 #endif 2617 2618 /* set solvertype */ 2619 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2620 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2621 2622 B->ops->destroy = MatDestroy_MUMPS; 2623 B->data = (void*)mumps; 2624 2625 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2626 2627 *F = B; 2628 PetscFunctionReturn(0); 2629 } 2630 2631 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2632 { 2633 Mat B; 2634 PetscErrorCode ierr; 2635 Mat_MUMPS *mumps; 2636 PetscBool isSeqBAIJ; 2637 2638 PetscFunctionBegin; 2639 /* Create the factorization matrix */ 2640 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2641 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2642 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2643 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2644 ierr = MatSetUp(B);CHKERRQ(ierr); 2645 2646 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2647 if (ftype == MAT_FACTOR_LU) { 2648 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2649 B->factortype = MAT_FACTOR_LU; 2650 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2651 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2652 mumps->sym = 0; 2653 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2654 2655 B->ops->getinfo = MatGetInfo_External; 2656 B->ops->view = MatView_MUMPS; 2657 2658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2671 2672 /* set solvertype */ 2673 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2674 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2675 2676 B->ops->destroy = MatDestroy_MUMPS; 2677 B->data = (void*)mumps; 2678 2679 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2680 2681 *F = B; 2682 PetscFunctionReturn(0); 2683 } 2684 2685 /* MatGetFactor for Seq and MPI SELL matrices */ 2686 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2687 { 2688 Mat B; 2689 PetscErrorCode ierr; 2690 Mat_MUMPS *mumps; 2691 PetscBool isSeqSELL; 2692 2693 PetscFunctionBegin; 2694 /* Create the factorization matrix */ 2695 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2696 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2697 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2698 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2699 ierr = MatSetUp(B);CHKERRQ(ierr); 2700 2701 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2702 2703 B->ops->view = MatView_MUMPS; 2704 B->ops->getinfo = MatGetInfo_MUMPS; 2705 2706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2707 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2710 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2717 2718 if (ftype == MAT_FACTOR_LU) { 2719 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2720 B->factortype = MAT_FACTOR_LU; 2721 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2722 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2723 mumps->sym = 0; 2724 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2725 2726 /* set solvertype */ 2727 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2728 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2729 2730 B->ops->destroy = MatDestroy_MUMPS; 2731 B->data = (void*)mumps; 2732 2733 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2734 2735 *F = B; 2736 PetscFunctionReturn(0); 2737 } 2738 2739 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2740 { 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2745 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2746 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2747 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2748 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2749 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2750 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2751 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2752 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2753 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2754 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758