1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 #if defined(PETSC_HAVE_OPENMP) && defined(PETSC_HAVE_PTHREAD) && defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) && defined(PETSC_HAVE_HWLOC) 47 #define PETSC_HAVE_OPENMP_SUPPORT 1 48 #endif 49 50 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 51 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 52 #define PetscMUMPS_c(mumps) \ 53 do { \ 54 if (mumps->use_petsc_omp_support) { \ 55 if (mumps->is_omp_master) { \ 56 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 MUMPS_c(&mumps->id); \ 58 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 59 } \ 60 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 61 } else { \ 62 MUMPS_c(&mumps->id); \ 63 } \ 64 } while(0) 65 #else 66 #define PetscMUMPS_c(mumps) \ 67 do { MUMPS_c(&mumps->id); } while (0) 68 #endif 69 70 /* declare MumpsScalar */ 71 #if defined(PETSC_USE_COMPLEX) 72 #if defined(PETSC_USE_REAL_SINGLE) 73 #define MumpsScalar mumps_complex 74 #else 75 #define MumpsScalar mumps_double_complex 76 #endif 77 #else 78 #define MumpsScalar PetscScalar 79 #endif 80 81 /* macros s.t. indices match MUMPS documentation */ 82 #define ICNTL(I) icntl[(I)-1] 83 #define CNTL(I) cntl[(I)-1] 84 #define INFOG(I) infog[(I)-1] 85 #define INFO(I) info[(I)-1] 86 #define RINFOG(I) rinfog[(I)-1] 87 #define RINFO(I) rinfo[(I)-1] 88 89 typedef struct { 90 #if defined(PETSC_USE_COMPLEX) 91 #if defined(PETSC_USE_REAL_SINGLE) 92 CMUMPS_STRUC_C id; 93 #else 94 ZMUMPS_STRUC_C id; 95 #endif 96 #else 97 #if defined(PETSC_USE_REAL_SINGLE) 98 SMUMPS_STRUC_C id; 99 #else 100 DMUMPS_STRUC_C id; 101 #endif 102 #endif 103 104 MatStructure matstruc; 105 PetscMPIInt myid,petsc_size; 106 PetscInt *irn,*jcn,nz,sym; 107 PetscScalar *val; 108 MPI_Comm mumps_comm; 109 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 110 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 111 Vec b_seq,x_seq; 112 PetscInt ninfo,*info; /* display INFO */ 113 PetscInt sizeredrhs; 114 PetscScalar *schur_sol; 115 PetscInt schur_sizesol; 116 117 PetscBool use_petsc_omp_support; 118 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 119 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 120 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 121 PetscMPIInt omp_comm_size; 122 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 123 PetscMPIInt *recvcount,*displs; 124 125 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 126 } Mat_MUMPS; 127 128 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 129 130 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 136 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 137 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 138 mumps->id.size_schur = 0; 139 mumps->id.schur_lld = 0; 140 mumps->id.ICNTL(19) = 0; 141 PetscFunctionReturn(0); 142 } 143 144 /* solve with rhs in mumps->id.redrhs and return in the same location */ 145 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 146 { 147 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 148 Mat S,B,X; 149 MatFactorSchurStatus schurstatus; 150 PetscInt sizesol; 151 PetscErrorCode ierr; 152 153 PetscFunctionBegin; 154 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 155 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 157 switch (schurstatus) { 158 case MAT_FACTOR_SCHUR_FACTORED: 159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 160 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 161 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 162 } else { 163 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 164 } 165 break; 166 case MAT_FACTOR_SCHUR_INVERTED: 167 sizesol = mumps->id.nrhs*mumps->id.size_schur; 168 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 169 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 170 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 171 mumps->schur_sizesol = sizesol; 172 } 173 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 174 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 175 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 176 } else { 177 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 178 } 179 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 180 break; 181 default: 182 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 183 break; 184 } 185 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 186 ierr = MatDestroy(&B);CHKERRQ(ierr); 187 ierr = MatDestroy(&X);CHKERRQ(ierr); 188 PetscFunctionReturn(0); 189 } 190 191 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 192 { 193 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 194 PetscErrorCode ierr; 195 196 PetscFunctionBegin; 197 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 198 PetscFunctionReturn(0); 199 } 200 if (!expansion) { /* prepare for the condensation step */ 201 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 202 /* allocate MUMPS internal array to store reduced right-hand sides */ 203 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 204 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 205 mumps->id.lredrhs = mumps->id.size_schur; 206 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 207 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 208 } 209 mumps->id.ICNTL(26) = 1; /* condensation phase */ 210 } else { /* prepare for the expansion step */ 211 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 212 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 213 mumps->id.ICNTL(26) = 2; /* expansion phase */ 214 PetscMUMPS_c(mumps); 215 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 216 /* restore defaults */ 217 mumps->id.ICNTL(26) = -1; 218 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 219 if (mumps->id.nrhs > 1) { 220 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 221 mumps->id.lredrhs = 0; 222 mumps->sizeredrhs = 0; 223 } 224 } 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 230 231 input: 232 A - matrix in aij,baij or sbaij (bs=1) format 233 shift - 0: C style output triple; 1: Fortran style output triple. 234 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 235 MAT_REUSE_MATRIX: only the values in v array are updated 236 output: 237 nnz - dim of r, c, and v (number of local nonzero entries of A) 238 r, c, v - row and col index, matrix values (matrix triples) 239 240 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 241 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 242 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 243 244 */ 245 246 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 247 { 248 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 249 PetscInt nz,rnz,i,j; 250 PetscErrorCode ierr; 251 PetscInt *row,*col; 252 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 253 254 PetscFunctionBegin; 255 *v=aa->a; 256 if (reuse == MAT_INITIAL_MATRIX) { 257 nz = aa->nz; 258 ai = aa->i; 259 aj = aa->j; 260 *nnz = nz; 261 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 262 col = row + nz; 263 264 nz = 0; 265 for (i=0; i<M; i++) { 266 rnz = ai[i+1] - ai[i]; 267 ajj = aj + ai[i]; 268 for (j=0; j<rnz; j++) { 269 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 270 } 271 } 272 *r = row; *c = col; 273 } 274 PetscFunctionReturn(0); 275 } 276 277 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 278 { 279 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 280 PetscInt *ptr; 281 282 PetscFunctionBegin; 283 *v = a->val; 284 if (reuse == MAT_INITIAL_MATRIX) { 285 PetscInt nz,i,j,row; 286 PetscErrorCode ierr; 287 288 nz = a->sliidx[a->totalslices]; 289 *nnz = nz; 290 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 291 *r = ptr; 292 *c = ptr + nz; 293 294 for (i=0; i<a->totalslices; i++) { 295 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 296 *ptr++ = 8*i + row + shift; 297 } 298 } 299 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 300 } 301 PetscFunctionReturn(0); 302 } 303 304 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 305 { 306 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 307 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 308 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 309 PetscErrorCode ierr; 310 PetscInt *row,*col; 311 312 PetscFunctionBegin; 313 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 314 M = A->rmap->N/bs; 315 *v = aa->a; 316 if (reuse == MAT_INITIAL_MATRIX) { 317 ai = aa->i; aj = aa->j; 318 nz = bs2*aa->nz; 319 *nnz = nz; 320 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 321 col = row + nz; 322 323 for (i=0; i<M; i++) { 324 ajj = aj + ai[i]; 325 rnz = ai[i+1] - ai[i]; 326 for (k=0; k<rnz; k++) { 327 for (j=0; j<bs; j++) { 328 for (m=0; m<bs; m++) { 329 row[idx] = i*bs + m + shift; 330 col[idx++] = bs*(ajj[k]) + j + shift; 331 } 332 } 333 } 334 } 335 *r = row; *c = col; 336 } 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 341 { 342 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 343 PetscInt nz,rnz,i,j; 344 PetscErrorCode ierr; 345 PetscInt *row,*col; 346 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 347 348 PetscFunctionBegin; 349 *v = aa->a; 350 if (reuse == MAT_INITIAL_MATRIX) { 351 nz = aa->nz; 352 ai = aa->i; 353 aj = aa->j; 354 *v = aa->a; 355 *nnz = nz; 356 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 357 col = row + nz; 358 359 nz = 0; 360 for (i=0; i<M; i++) { 361 rnz = ai[i+1] - ai[i]; 362 ajj = aj + ai[i]; 363 for (j=0; j<rnz; j++) { 364 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 365 } 366 } 367 *r = row; *c = col; 368 } 369 PetscFunctionReturn(0); 370 } 371 372 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 373 { 374 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 375 PetscInt nz,rnz,i,j; 376 const PetscScalar *av,*v1; 377 PetscScalar *val; 378 PetscErrorCode ierr; 379 PetscInt *row,*col; 380 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 381 PetscBool missing; 382 383 PetscFunctionBegin; 384 ai = aa->i; aj = aa->j; av = aa->a; 385 adiag = aa->diag; 386 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 387 if (reuse == MAT_INITIAL_MATRIX) { 388 /* count nz in the upper triangular part of A */ 389 nz = 0; 390 if (missing) { 391 for (i=0; i<M; i++) { 392 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 393 for (j=ai[i];j<ai[i+1];j++) { 394 if (aj[j] < i) continue; 395 nz++; 396 } 397 } else { 398 nz += ai[i+1] - adiag[i]; 399 } 400 } 401 } else { 402 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 403 } 404 *nnz = nz; 405 406 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 407 col = row + nz; 408 val = (PetscScalar*)(col + nz); 409 410 nz = 0; 411 if (missing) { 412 for (i=0; i<M; i++) { 413 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 414 for (j=ai[i];j<ai[i+1];j++) { 415 if (aj[j] < i) continue; 416 row[nz] = i+shift; 417 col[nz] = aj[j]+shift; 418 val[nz] = av[j]; 419 nz++; 420 } 421 } else { 422 rnz = ai[i+1] - adiag[i]; 423 ajj = aj + adiag[i]; 424 v1 = av + adiag[i]; 425 for (j=0; j<rnz; j++) { 426 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 427 } 428 } 429 } 430 } else { 431 for (i=0; i<M; i++) { 432 rnz = ai[i+1] - adiag[i]; 433 ajj = aj + adiag[i]; 434 v1 = av + adiag[i]; 435 for (j=0; j<rnz; j++) { 436 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 437 } 438 } 439 } 440 *r = row; *c = col; *v = val; 441 } else { 442 nz = 0; val = *v; 443 if (missing) { 444 for (i=0; i <M; i++) { 445 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 446 for (j=ai[i];j<ai[i+1];j++) { 447 if (aj[j] < i) continue; 448 val[nz++] = av[j]; 449 } 450 } else { 451 rnz = ai[i+1] - adiag[i]; 452 v1 = av + adiag[i]; 453 for (j=0; j<rnz; j++) { 454 val[nz++] = v1[j]; 455 } 456 } 457 } 458 } else { 459 for (i=0; i <M; i++) { 460 rnz = ai[i+1] - adiag[i]; 461 v1 = av + adiag[i]; 462 for (j=0; j<rnz; j++) { 463 val[nz++] = v1[j]; 464 } 465 } 466 } 467 } 468 PetscFunctionReturn(0); 469 } 470 471 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 472 { 473 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 474 PetscErrorCode ierr; 475 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 476 PetscInt *row,*col; 477 const PetscScalar *av, *bv,*v1,*v2; 478 PetscScalar *val; 479 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 480 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 481 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 482 483 PetscFunctionBegin; 484 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 485 av=aa->a; bv=bb->a; 486 487 garray = mat->garray; 488 489 if (reuse == MAT_INITIAL_MATRIX) { 490 nz = aa->nz + bb->nz; 491 *nnz = nz; 492 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 493 col = row + nz; 494 val = (PetscScalar*)(col + nz); 495 496 *r = row; *c = col; *v = val; 497 } else { 498 row = *r; col = *c; val = *v; 499 } 500 501 jj = 0; irow = rstart; 502 for (i=0; i<m; i++) { 503 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 504 countA = ai[i+1] - ai[i]; 505 countB = bi[i+1] - bi[i]; 506 bjj = bj + bi[i]; 507 v1 = av + ai[i]; 508 v2 = bv + bi[i]; 509 510 /* A-part */ 511 for (j=0; j<countA; j++) { 512 if (reuse == MAT_INITIAL_MATRIX) { 513 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 514 } 515 val[jj++] = v1[j]; 516 } 517 518 /* B-part */ 519 for (j=0; j < countB; j++) { 520 if (reuse == MAT_INITIAL_MATRIX) { 521 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 522 } 523 val[jj++] = v2[j]; 524 } 525 irow++; 526 } 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 531 { 532 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 533 PetscErrorCode ierr; 534 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 535 PetscInt *row,*col; 536 const PetscScalar *av, *bv,*v1,*v2; 537 PetscScalar *val; 538 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 539 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 540 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 541 542 PetscFunctionBegin; 543 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 544 av=aa->a; bv=bb->a; 545 546 garray = mat->garray; 547 548 if (reuse == MAT_INITIAL_MATRIX) { 549 nz = aa->nz + bb->nz; 550 *nnz = nz; 551 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 552 col = row + nz; 553 val = (PetscScalar*)(col + nz); 554 555 *r = row; *c = col; *v = val; 556 } else { 557 row = *r; col = *c; val = *v; 558 } 559 560 jj = 0; irow = rstart; 561 for (i=0; i<m; i++) { 562 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 563 countA = ai[i+1] - ai[i]; 564 countB = bi[i+1] - bi[i]; 565 bjj = bj + bi[i]; 566 v1 = av + ai[i]; 567 v2 = bv + bi[i]; 568 569 /* A-part */ 570 for (j=0; j<countA; j++) { 571 if (reuse == MAT_INITIAL_MATRIX) { 572 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 573 } 574 val[jj++] = v1[j]; 575 } 576 577 /* B-part */ 578 for (j=0; j < countB; j++) { 579 if (reuse == MAT_INITIAL_MATRIX) { 580 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 581 } 582 val[jj++] = v2[j]; 583 } 584 irow++; 585 } 586 PetscFunctionReturn(0); 587 } 588 589 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 590 { 591 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 592 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 593 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 594 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 595 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 596 const PetscInt bs2=mat->bs2; 597 PetscErrorCode ierr; 598 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 599 PetscInt *row,*col; 600 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 601 PetscScalar *val; 602 603 PetscFunctionBegin; 604 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 605 if (reuse == MAT_INITIAL_MATRIX) { 606 nz = bs2*(aa->nz + bb->nz); 607 *nnz = nz; 608 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 609 col = row + nz; 610 val = (PetscScalar*)(col + nz); 611 612 *r = row; *c = col; *v = val; 613 } else { 614 row = *r; col = *c; val = *v; 615 } 616 617 jj = 0; irow = rstart; 618 for (i=0; i<mbs; i++) { 619 countA = ai[i+1] - ai[i]; 620 countB = bi[i+1] - bi[i]; 621 ajj = aj + ai[i]; 622 bjj = bj + bi[i]; 623 v1 = av + bs2*ai[i]; 624 v2 = bv + bs2*bi[i]; 625 626 idx = 0; 627 /* A-part */ 628 for (k=0; k<countA; k++) { 629 for (j=0; j<bs; j++) { 630 for (n=0; n<bs; n++) { 631 if (reuse == MAT_INITIAL_MATRIX) { 632 row[jj] = irow + n + shift; 633 col[jj] = rstart + bs*ajj[k] + j + shift; 634 } 635 val[jj++] = v1[idx++]; 636 } 637 } 638 } 639 640 idx = 0; 641 /* B-part */ 642 for (k=0; k<countB; k++) { 643 for (j=0; j<bs; j++) { 644 for (n=0; n<bs; n++) { 645 if (reuse == MAT_INITIAL_MATRIX) { 646 row[jj] = irow + n + shift; 647 col[jj] = bs*garray[bjj[k]] + j + shift; 648 } 649 val[jj++] = v2[idx++]; 650 } 651 } 652 } 653 irow += bs; 654 } 655 PetscFunctionReturn(0); 656 } 657 658 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 659 { 660 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 661 PetscErrorCode ierr; 662 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 663 PetscInt *row,*col; 664 const PetscScalar *av, *bv,*v1,*v2; 665 PetscScalar *val; 666 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 667 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 668 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 669 670 PetscFunctionBegin; 671 ai=aa->i; aj=aa->j; adiag=aa->diag; 672 bi=bb->i; bj=bb->j; garray = mat->garray; 673 av=aa->a; bv=bb->a; 674 675 rstart = A->rmap->rstart; 676 677 if (reuse == MAT_INITIAL_MATRIX) { 678 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 679 nzb = 0; /* num of upper triangular entries in mat->B */ 680 for (i=0; i<m; i++) { 681 nza += (ai[i+1] - adiag[i]); 682 countB = bi[i+1] - bi[i]; 683 bjj = bj + bi[i]; 684 for (j=0; j<countB; j++) { 685 if (garray[bjj[j]] > rstart) nzb++; 686 } 687 } 688 689 nz = nza + nzb; /* total nz of upper triangular part of mat */ 690 *nnz = nz; 691 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 692 col = row + nz; 693 val = (PetscScalar*)(col + nz); 694 695 *r = row; *c = col; *v = val; 696 } else { 697 row = *r; col = *c; val = *v; 698 } 699 700 jj = 0; irow = rstart; 701 for (i=0; i<m; i++) { 702 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 703 v1 = av + adiag[i]; 704 countA = ai[i+1] - adiag[i]; 705 countB = bi[i+1] - bi[i]; 706 bjj = bj + bi[i]; 707 v2 = bv + bi[i]; 708 709 /* A-part */ 710 for (j=0; j<countA; j++) { 711 if (reuse == MAT_INITIAL_MATRIX) { 712 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 713 } 714 val[jj++] = v1[j]; 715 } 716 717 /* B-part */ 718 for (j=0; j < countB; j++) { 719 if (garray[bjj[j]] > rstart) { 720 if (reuse == MAT_INITIAL_MATRIX) { 721 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 722 } 723 val[jj++] = v2[j]; 724 } 725 } 726 irow++; 727 } 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatDestroy_MUMPS(Mat A) 732 { 733 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 738 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 739 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 740 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 741 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 742 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 743 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 744 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 745 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 746 mumps->id.job = JOB_END; 747 PetscMUMPS_c(mumps); 748 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 749 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 750 #endif 751 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 752 ierr = PetscFree(A->data);CHKERRQ(ierr); 753 754 /* clear composed functions */ 755 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 766 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 772 { 773 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 774 PetscScalar *array; 775 Vec b_seq; 776 IS is_iden,is_petsc; 777 PetscErrorCode ierr; 778 PetscInt i; 779 PetscBool second_solve = PETSC_FALSE; 780 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 781 782 PetscFunctionBegin; 783 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 784 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 785 786 if (A->factorerrortype) { 787 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 788 ierr = VecSetInf(x);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 mumps->id.ICNTL(20)= 0; /* dense RHS */ 793 mumps->id.nrhs = 1; 794 b_seq = mumps->b_seq; 795 if (mumps->petsc_size > 1) { 796 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 797 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 799 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 800 } else { /* petsc_size == 1 */ 801 ierr = VecCopy(b,x);CHKERRQ(ierr); 802 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 803 } 804 if (!mumps->myid) { /* define rhs on the host */ 805 mumps->id.nrhs = 1; 806 mumps->id.rhs = (MumpsScalar*)array; 807 } 808 809 /* 810 handle condensation step of Schur complement (if any) 811 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 812 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 813 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 814 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 815 */ 816 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 817 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 818 second_solve = PETSC_TRUE; 819 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 820 } 821 /* solve phase */ 822 /*-------------*/ 823 mumps->id.job = JOB_SOLVE; 824 PetscMUMPS_c(mumps); 825 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 826 827 /* handle expansion step of Schur complement (if any) */ 828 if (second_solve) { 829 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 830 } 831 832 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 833 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 834 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 835 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 836 } 837 if (!mumps->scat_sol) { /* create scatter scat_sol */ 838 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 839 for (i=0; i<mumps->id.lsol_loc; i++) { 840 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 841 } 842 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 843 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 844 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 845 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 846 847 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 848 } 849 850 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 } 853 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 858 { 859 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 mumps->id.ICNTL(9) = 0; 864 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 865 mumps->id.ICNTL(9) = 1; 866 PetscFunctionReturn(0); 867 } 868 869 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 870 { 871 PetscErrorCode ierr; 872 Mat Bt = NULL; 873 PetscBool flg, flgT; 874 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 875 PetscInt i,nrhs,M; 876 PetscScalar *array,*bray; 877 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idx,*iidx,*idxx,*isol_loc_save; 878 MumpsScalar *sol_loc,*sol_loc_save; 879 IS is_to,is_from; 880 PetscInt k,proc,j,m; 881 const PetscInt *rstart; 882 Vec v_mpi,b_seq,x_seq; 883 VecScatter scat_rhs,scat_sol; 884 PetscScalar *aa; 885 PetscInt spnr,*ia,*ja; 886 Mat_MPIAIJ *b = NULL; 887 888 PetscFunctionBegin; 889 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 890 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 891 892 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 893 if (flg) { /* dense B */ 894 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 895 mumps->id.ICNTL(20)= 0; /* dense RHS */ 896 } else { /* sparse B */ 897 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 898 if (flgT) { /* input B is transpose of actural RHS matrix, 899 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 900 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 901 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 902 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 903 } 904 905 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 906 mumps->id.nrhs = nrhs; 907 mumps->id.lrhs = M; 908 mumps->id.rhs = NULL; 909 910 if (mumps->petsc_size == 1) { 911 PetscScalar *aa; 912 PetscInt spnr,*ia,*ja; 913 PetscBool second_solve = PETSC_FALSE; 914 915 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 916 mumps->id.rhs = (MumpsScalar*)array; 917 918 if (!Bt) { /* dense B */ 919 /* copy B to X */ 920 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 921 ierr = PetscMemcpy(array,bray,M*nrhs*sizeof(PetscScalar));CHKERRQ(ierr); 922 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 923 } else { /* sparse B */ 924 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 925 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 926 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 927 /* mumps requires ia and ja start at 1! */ 928 mumps->id.irhs_ptr = ia; 929 mumps->id.irhs_sparse = ja; 930 mumps->id.nz_rhs = ia[spnr] - 1; 931 mumps->id.rhs_sparse = (MumpsScalar*)aa; 932 } 933 /* handle condensation step of Schur complement (if any) */ 934 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 935 second_solve = PETSC_TRUE; 936 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 937 } 938 /* solve phase */ 939 /*-------------*/ 940 mumps->id.job = JOB_SOLVE; 941 PetscMUMPS_c(mumps); 942 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 943 944 /* handle expansion step of Schur complement (if any) */ 945 if (second_solve) { 946 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 947 } 948 if (Bt) { /* sparse B */ 949 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 950 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 951 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 952 } 953 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 958 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 959 960 /* create x_seq to hold mumps local solution */ 961 isol_loc_save = mumps->id.isol_loc; /* save it for MatSovle() */ 962 sol_loc_save = mumps->id.sol_loc; 963 964 lsol_loc = mumps->id.lsol_loc; 965 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 966 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 967 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 968 mumps->id.isol_loc = isol_loc; 969 970 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&x_seq);CHKERRQ(ierr); 971 972 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 973 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B; 974 iidx: inverse of idx, will be used by scattering mumps x_seq -> petsc X */ 975 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 976 977 if (!Bt) { /* dense B */ 978 /* wrap dense rhs matrix B into a vector v_mpi */ 979 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 980 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 981 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 982 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 983 984 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 985 if (!mumps->myid) { 986 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 987 k = 0; 988 for (proc=0; proc<mumps->petsc_size; proc++){ 989 for (j=0; j<nrhs; j++){ 990 for (i=rstart[proc]; i<rstart[proc+1]; i++){ 991 idx[k++] = j*M + i; 992 } 993 } 994 } 995 996 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 997 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 998 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 999 } else { 1000 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1001 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1002 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1003 } 1004 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1007 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 1010 if (!mumps->myid) { /* define rhs on the host */ 1011 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1012 mumps->id.rhs = (MumpsScalar*)bray; 1013 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1014 } 1015 1016 } else { /* sparse B */ 1017 b = (Mat_MPIAIJ*)Bt->data; 1018 1019 /* wrap dense X into a vector v_mpi */ 1020 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1021 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1022 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1023 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1024 1025 if (!mumps->myid) { 1026 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1027 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1028 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1029 /* mumps requires ia and ja start at 1! */ 1030 mumps->id.irhs_ptr = ia; 1031 mumps->id.irhs_sparse = ja; 1032 mumps->id.nz_rhs = ia[spnr] - 1; 1033 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1034 } else { 1035 mumps->id.irhs_ptr = NULL; 1036 mumps->id.irhs_sparse = NULL; 1037 mumps->id.nz_rhs = 0; 1038 mumps->id.rhs_sparse = NULL; 1039 } 1040 } 1041 1042 /* solve phase */ 1043 /*-------------*/ 1044 mumps->id.job = JOB_SOLVE; 1045 PetscMUMPS_c(mumps); 1046 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1047 1048 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1049 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1050 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1051 1052 /* create scatter scat_sol */ 1053 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1054 /* iidx: inverse of idx computed above, used for scattering mumps x_seq to petsc X */ 1055 iidx = idx; 1056 k = 0; 1057 for (proc=0; proc<mumps->petsc_size; proc++){ 1058 for (j=0; j<nrhs; j++){ 1059 for (i=rstart[proc]; i<rstart[proc+1]; i++) iidx[j*M + i] = k++; 1060 } 1061 } 1062 1063 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1064 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1065 for (i=0; i<lsol_loc; i++) { 1066 isol_loc[i] -= 1; /* change Fortran style to C style */ 1067 idxx[i] = iidx[isol_loc[i]]; 1068 for (j=1; j<nrhs; j++){ 1069 idxx[j*lsol_loc+i] = iidx[isol_loc[i]+j*M]; 1070 } 1071 } 1072 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1073 ierr = VecScatterCreate(x_seq,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1074 ierr = VecScatterBegin(scat_sol,x_seq,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1075 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1076 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1077 ierr = VecScatterEnd(scat_sol,x_seq,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1078 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1079 1080 /* free spaces */ 1081 mumps->id.sol_loc = sol_loc_save; 1082 mumps->id.isol_loc = isol_loc_save; 1083 1084 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1085 ierr = PetscFree(idx);CHKERRQ(ierr); 1086 ierr = PetscFree(idxx);CHKERRQ(ierr); 1087 ierr = VecDestroy(&x_seq);CHKERRQ(ierr); 1088 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1089 if (Bt) { 1090 if (!mumps->myid) { 1091 b = (Mat_MPIAIJ*)Bt->data; 1092 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1093 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1094 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1095 } 1096 } else { 1097 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1098 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1099 } 1100 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1101 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1106 { 1107 PetscErrorCode ierr; 1108 PetscBool flg; 1109 Mat B; 1110 1111 PetscFunctionBegin; 1112 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1113 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1114 1115 /* Create B=Bt^T that uses Bt's data structure */ 1116 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1117 1118 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1119 ierr = MatDestroy(&B);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #if !defined(PETSC_USE_COMPLEX) 1124 /* 1125 input: 1126 F: numeric factor 1127 output: 1128 nneg: total number of negative pivots 1129 nzero: total number of zero pivots 1130 npos: (global dimension of F) - nneg - nzero 1131 */ 1132 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1133 { 1134 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1135 PetscErrorCode ierr; 1136 PetscMPIInt size; 1137 1138 PetscFunctionBegin; 1139 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1140 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1141 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1142 1143 if (nneg) *nneg = mumps->id.INFOG(12); 1144 if (nzero || npos) { 1145 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1146 if (nzero) *nzero = mumps->id.INFOG(28); 1147 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 #endif 1152 1153 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1154 { 1155 PetscErrorCode ierr; 1156 PetscInt i,nz=0,*irn,*jcn=0; 1157 PetscScalar *val=0; 1158 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1159 1160 PetscFunctionBegin; 1161 if (mumps->omp_comm_size > 1) { 1162 if (reuse == MAT_INITIAL_MATRIX) { 1163 /* master first gathers counts of nonzeros to receive */ 1164 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1165 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1166 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1167 1168 /* master allocates memory to receive nonzeros */ 1169 if (mumps->is_omp_master) { 1170 displs[0] = 0; 1171 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1172 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1173 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1174 jcn = irn + nz; 1175 val = (PetscScalar*)(jcn + nz); 1176 } 1177 1178 /* save the gatherv plan */ 1179 mumps->mpinz = mpinz; /* used as send count */ 1180 mumps->recvcount = recvcount; 1181 mumps->displs = displs; 1182 1183 /* master gathers nonzeros */ 1184 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1185 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1186 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1187 1188 /* master frees its row/col/val and replaces them with bigger arrays */ 1189 if (mumps->is_omp_master) { 1190 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1191 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1192 mumps->irn = irn; 1193 mumps->jcn = jcn; 1194 mumps->val = val; 1195 } 1196 } else { 1197 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1198 } 1199 } 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1204 { 1205 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1206 PetscErrorCode ierr; 1207 PetscBool isMPIAIJ; 1208 1209 PetscFunctionBegin; 1210 if (mumps->id.INFOG(1) < 0) { 1211 if (mumps->id.INFOG(1) == -6) { 1212 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1213 } 1214 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1219 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1220 1221 /* numerical factorization phase */ 1222 /*-------------------------------*/ 1223 mumps->id.job = JOB_FACTNUMERIC; 1224 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1225 if (!mumps->myid) { 1226 mumps->id.a = (MumpsScalar*)mumps->val; 1227 } 1228 } else { 1229 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1230 } 1231 PetscMUMPS_c(mumps); 1232 if (mumps->id.INFOG(1) < 0) { 1233 if (A->erroriffailure) { 1234 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1235 } else { 1236 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1237 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1238 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1239 } else if (mumps->id.INFOG(1) == -13) { 1240 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1241 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1242 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1243 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1244 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1245 } else { 1246 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1247 F->factorerrortype = MAT_FACTOR_OTHER; 1248 } 1249 } 1250 } 1251 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1252 1253 F->assembled = PETSC_TRUE; 1254 mumps->matstruc = SAME_NONZERO_PATTERN; 1255 if (F->schur) { /* reset Schur status to unfactored */ 1256 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1257 mumps->id.ICNTL(19) = 2; 1258 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1259 } 1260 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1261 } 1262 1263 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1264 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1265 1266 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1267 if (mumps->petsc_size > 1) { 1268 PetscInt lsol_loc; 1269 PetscScalar *sol_loc; 1270 1271 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1272 1273 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1274 if (mumps->x_seq) { 1275 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1276 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1277 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1278 } 1279 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1280 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1281 mumps->id.lsol_loc = lsol_loc; 1282 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1283 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1284 } 1285 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* Sets MUMPS options from the options database */ 1290 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1291 { 1292 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1293 PetscErrorCode ierr; 1294 PetscInt icntl,info[40],i,ninfo=40; 1295 PetscBool flg; 1296 1297 PetscFunctionBegin; 1298 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1299 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1300 if (flg) mumps->id.ICNTL(1) = icntl; 1301 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1302 if (flg) mumps->id.ICNTL(2) = icntl; 1303 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1304 if (flg) mumps->id.ICNTL(3) = icntl; 1305 1306 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1307 if (flg) mumps->id.ICNTL(4) = icntl; 1308 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1309 1310 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1311 if (flg) mumps->id.ICNTL(6) = icntl; 1312 1313 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1314 if (flg) { 1315 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1316 else mumps->id.ICNTL(7) = icntl; 1317 } 1318 1319 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1320 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1321 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1322 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1323 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1324 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1325 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1326 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1327 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1328 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1329 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1330 } 1331 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1332 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1333 1334 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1335 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1336 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1337 if (mumps->id.ICNTL(24)) { 1338 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1339 } 1340 1341 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1342 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1343 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1344 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1345 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1346 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1348 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1349 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1350 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Lock Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1351 1352 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1353 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1354 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1355 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1356 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1357 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1358 1359 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1360 1361 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1362 if (ninfo) { 1363 if (ninfo > 40) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 40\n",ninfo); 1364 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1365 mumps->ninfo = ninfo; 1366 for (i=0; i<ninfo; i++) { 1367 if (info[i] < 0 || info[i]>40) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 40\n",ninfo); 1368 else mumps->info[i] = info[i]; 1369 } 1370 } 1371 1372 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1377 { 1378 PetscErrorCode ierr; 1379 PetscInt nthreads=0; 1380 1381 PetscFunctionBegin; 1382 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1383 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1384 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1385 1386 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1387 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1388 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1389 if (mumps->use_petsc_omp_support) { 1390 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1391 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1392 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1393 #else 1394 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1395 #endif 1396 } else { 1397 mumps->omp_comm = PETSC_COMM_SELF; 1398 mumps->mumps_comm = mumps->petsc_comm; 1399 mumps->is_omp_master = PETSC_TRUE; 1400 } 1401 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1402 1403 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1404 mumps->id.job = JOB_INIT; 1405 mumps->id.par = 1; /* host participates factorizaton and solve */ 1406 mumps->id.sym = mumps->sym; 1407 1408 PetscMUMPS_c(mumps); 1409 1410 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1411 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1412 */ 1413 ierr = MPI_Bcast(mumps->id.icntl,40,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1414 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1415 1416 mumps->scat_rhs = NULL; 1417 mumps->scat_sol = NULL; 1418 1419 /* set PETSc-MUMPS default options - override MUMPS default */ 1420 mumps->id.ICNTL(3) = 0; 1421 mumps->id.ICNTL(4) = 0; 1422 if (mumps->petsc_size == 1) { 1423 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1424 } else { 1425 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1426 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1427 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1428 } 1429 1430 /* schur */ 1431 mumps->id.size_schur = 0; 1432 mumps->id.listvar_schur = NULL; 1433 mumps->id.schur = NULL; 1434 mumps->sizeredrhs = 0; 1435 mumps->schur_sol = NULL; 1436 mumps->schur_sizesol = 0; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1441 { 1442 PetscErrorCode ierr; 1443 1444 PetscFunctionBegin; 1445 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1446 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1447 if (mumps->id.INFOG(1) < 0) { 1448 if (A->erroriffailure) { 1449 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1450 } else { 1451 if (mumps->id.INFOG(1) == -6) { 1452 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1453 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1454 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1455 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1456 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1457 } else { 1458 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1459 F->factorerrortype = MAT_FACTOR_OTHER; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1467 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1468 { 1469 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1470 PetscErrorCode ierr; 1471 Vec b; 1472 IS is_iden; 1473 const PetscInt M = A->rmap->N; 1474 1475 PetscFunctionBegin; 1476 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1477 1478 /* Set MUMPS options from the options database */ 1479 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1480 1481 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1482 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1483 1484 /* analysis phase */ 1485 /*----------------*/ 1486 mumps->id.job = JOB_FACTSYMBOLIC; 1487 mumps->id.n = M; 1488 switch (mumps->id.ICNTL(18)) { 1489 case 0: /* centralized assembled matrix input */ 1490 if (!mumps->myid) { 1491 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1492 if (mumps->id.ICNTL(6)>1) { 1493 mumps->id.a = (MumpsScalar*)mumps->val; 1494 } 1495 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1496 /* 1497 PetscBool flag; 1498 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1499 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1500 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1501 */ 1502 if (!mumps->myid) { 1503 const PetscInt *idx; 1504 PetscInt i,*perm_in; 1505 1506 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1507 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1508 1509 mumps->id.perm_in = perm_in; 1510 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1511 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1512 } 1513 } 1514 } 1515 break; 1516 case 3: /* distributed assembled matrix input (size>1) */ 1517 mumps->id.nz_loc = mumps->nz; 1518 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1519 if (mumps->id.ICNTL(6)>1) { 1520 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1521 } 1522 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1523 if (!mumps->myid) { 1524 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->N,&mumps->b_seq);CHKERRQ(ierr); 1525 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap->N,0,1,&is_iden);CHKERRQ(ierr); 1526 } else { 1527 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1528 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1529 } 1530 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1531 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1532 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1533 ierr = VecDestroy(&b);CHKERRQ(ierr); 1534 break; 1535 } 1536 PetscMUMPS_c(mumps); 1537 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1538 1539 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1540 F->ops->solve = MatSolve_MUMPS; 1541 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1542 F->ops->matsolve = MatMatSolve_MUMPS; 1543 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1544 PetscFunctionReturn(0); 1545 } 1546 1547 /* Note the Petsc r and c permutations are ignored */ 1548 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1549 { 1550 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1551 PetscErrorCode ierr; 1552 Vec b; 1553 IS is_iden; 1554 const PetscInt M = A->rmap->N; 1555 1556 PetscFunctionBegin; 1557 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1558 1559 /* Set MUMPS options from the options database */ 1560 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1561 1562 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1563 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1564 1565 /* analysis phase */ 1566 /*----------------*/ 1567 mumps->id.job = JOB_FACTSYMBOLIC; 1568 mumps->id.n = M; 1569 switch (mumps->id.ICNTL(18)) { 1570 case 0: /* centralized assembled matrix input */ 1571 if (!mumps->myid) { 1572 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1573 if (mumps->id.ICNTL(6)>1) { 1574 mumps->id.a = (MumpsScalar*)mumps->val; 1575 } 1576 } 1577 break; 1578 case 3: /* distributed assembled matrix input (size>1) */ 1579 mumps->id.nz_loc = mumps->nz; 1580 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1581 if (mumps->id.ICNTL(6)>1) { 1582 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1583 } 1584 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1585 if (!mumps->myid) { 1586 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1587 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1588 } else { 1589 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1590 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1591 } 1592 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1593 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1594 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1595 ierr = VecDestroy(&b);CHKERRQ(ierr); 1596 break; 1597 } 1598 PetscMUMPS_c(mumps); 1599 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1600 1601 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1602 F->ops->solve = MatSolve_MUMPS; 1603 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1604 PetscFunctionReturn(0); 1605 } 1606 1607 /* Note the Petsc r permutation and factor info are ignored */ 1608 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1609 { 1610 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1611 PetscErrorCode ierr; 1612 Vec b; 1613 IS is_iden; 1614 const PetscInt M = A->rmap->N; 1615 1616 PetscFunctionBegin; 1617 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1618 1619 /* Set MUMPS options from the options database */ 1620 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1621 1622 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1623 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1624 1625 /* analysis phase */ 1626 /*----------------*/ 1627 mumps->id.job = JOB_FACTSYMBOLIC; 1628 mumps->id.n = M; 1629 switch (mumps->id.ICNTL(18)) { 1630 case 0: /* centralized assembled matrix input */ 1631 if (!mumps->myid) { 1632 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1633 if (mumps->id.ICNTL(6)>1) { 1634 mumps->id.a = (MumpsScalar*)mumps->val; 1635 } 1636 } 1637 break; 1638 case 3: /* distributed assembled matrix input (size>1) */ 1639 mumps->id.nz_loc = mumps->nz; 1640 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1641 if (mumps->id.ICNTL(6)>1) { 1642 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1643 } 1644 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1645 if (!mumps->myid) { 1646 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1647 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1648 } else { 1649 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1650 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1651 } 1652 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1653 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1654 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1655 ierr = VecDestroy(&b);CHKERRQ(ierr); 1656 break; 1657 } 1658 PetscMUMPS_c(mumps); 1659 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1660 1661 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1662 F->ops->solve = MatSolve_MUMPS; 1663 F->ops->solvetranspose = MatSolve_MUMPS; 1664 F->ops->matsolve = MatMatSolve_MUMPS; 1665 #if defined(PETSC_USE_COMPLEX) 1666 F->ops->getinertia = NULL; 1667 #else 1668 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1669 #endif 1670 PetscFunctionReturn(0); 1671 } 1672 1673 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1674 { 1675 PetscErrorCode ierr; 1676 PetscBool iascii; 1677 PetscViewerFormat format; 1678 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1679 1680 PetscFunctionBegin; 1681 /* check if matrix is mumps type */ 1682 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1683 1684 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1685 if (iascii) { 1686 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1687 if (format == PETSC_VIEWER_ASCII_INFO) { 1688 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1689 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1690 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1691 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1692 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1693 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1694 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1695 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1696 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1697 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1698 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1699 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1700 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1701 if (mumps->id.ICNTL(11)>0) { 1702 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1703 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1704 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1705 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1706 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1707 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1708 } 1709 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1710 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1711 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1712 /* ICNTL(15-17) not used */ 1713 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1714 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1715 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1716 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1717 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1718 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1719 1720 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1721 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1722 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1723 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1724 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1725 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1726 1727 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1728 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1729 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1730 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1731 1732 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1733 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1734 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1735 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1736 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1737 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1738 1739 /* infomation local to each processor */ 1740 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1741 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1742 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1743 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1744 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1745 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1746 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1748 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1749 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1750 1751 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1752 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1753 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1754 1755 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1756 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1758 1759 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1760 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1761 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1762 1763 if (mumps->ninfo && mumps->ninfo <= 40){ 1764 PetscInt i; 1765 for (i=0; i<mumps->ninfo; i++){ 1766 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1767 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1768 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1769 } 1770 } 1771 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1772 1773 if (!mumps->myid) { /* information from the host */ 1774 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1778 1779 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1795 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1796 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1797 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1798 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1808 } 1809 } 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1815 { 1816 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1817 1818 PetscFunctionBegin; 1819 info->block_size = 1.0; 1820 info->nz_allocated = mumps->id.INFOG(20); 1821 info->nz_used = mumps->id.INFOG(20); 1822 info->nz_unneeded = 0.0; 1823 info->assemblies = 0.0; 1824 info->mallocs = 0.0; 1825 info->memory = 0.0; 1826 info->fill_ratio_given = 0; 1827 info->fill_ratio_needed = 0; 1828 info->factor_mallocs = 0; 1829 PetscFunctionReturn(0); 1830 } 1831 1832 /* -------------------------------------------------------------------------------------------*/ 1833 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1834 { 1835 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1836 const PetscInt *idxs; 1837 PetscInt size,i; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1842 if (mumps->petsc_size > 1) { 1843 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1844 1845 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1846 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1847 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1848 } 1849 if (mumps->id.size_schur != size) { 1850 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1851 mumps->id.size_schur = size; 1852 mumps->id.schur_lld = size; 1853 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1854 } 1855 1856 /* Schur complement matrix */ 1857 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1858 if (mumps->sym == 1) { 1859 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1860 } 1861 1862 /* MUMPS expects Fortran style indices */ 1863 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1864 ierr = PetscMemcpy(mumps->id.listvar_schur,idxs,size*sizeof(PetscInt));CHKERRQ(ierr); 1865 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1866 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1867 if (mumps->petsc_size > 1) { 1868 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1869 } else { 1870 if (F->factortype == MAT_FACTOR_LU) { 1871 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1872 } else { 1873 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1874 } 1875 } 1876 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1877 mumps->id.ICNTL(26) = -1; 1878 PetscFunctionReturn(0); 1879 } 1880 1881 /* -------------------------------------------------------------------------------------------*/ 1882 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1883 { 1884 Mat St; 1885 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1886 PetscScalar *array; 1887 #if defined(PETSC_USE_COMPLEX) 1888 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1889 #endif 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1894 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1895 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1896 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1897 ierr = MatSetUp(St);CHKERRQ(ierr); 1898 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1899 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1900 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1901 PetscInt i,j,N=mumps->id.size_schur; 1902 for (i=0;i<N;i++) { 1903 for (j=0;j<N;j++) { 1904 #if !defined(PETSC_USE_COMPLEX) 1905 PetscScalar val = mumps->id.schur[i*N+j]; 1906 #else 1907 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1908 #endif 1909 array[j*N+i] = val; 1910 } 1911 } 1912 } else { /* stored by columns */ 1913 ierr = PetscMemcpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur*sizeof(PetscScalar));CHKERRQ(ierr); 1914 } 1915 } else { /* either full or lower-triangular (not packed) */ 1916 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1917 PetscInt i,j,N=mumps->id.size_schur; 1918 for (i=0;i<N;i++) { 1919 for (j=i;j<N;j++) { 1920 #if !defined(PETSC_USE_COMPLEX) 1921 PetscScalar val = mumps->id.schur[i*N+j]; 1922 #else 1923 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1924 #endif 1925 array[i*N+j] = val; 1926 array[j*N+i] = val; 1927 } 1928 } 1929 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1930 ierr = PetscMemcpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur*sizeof(PetscScalar));CHKERRQ(ierr); 1931 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1932 PetscInt i,j,N=mumps->id.size_schur; 1933 for (i=0;i<N;i++) { 1934 for (j=0;j<i+1;j++) { 1935 #if !defined(PETSC_USE_COMPLEX) 1936 PetscScalar val = mumps->id.schur[i*N+j]; 1937 #else 1938 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1939 #endif 1940 array[i*N+j] = val; 1941 array[j*N+i] = val; 1942 } 1943 } 1944 } 1945 } 1946 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1947 *S = St; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* -------------------------------------------------------------------------------------------*/ 1952 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1953 { 1954 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1955 1956 PetscFunctionBegin; 1957 mumps->id.ICNTL(icntl) = ival; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1962 { 1963 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1964 1965 PetscFunctionBegin; 1966 *ival = mumps->id.ICNTL(icntl); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /*@ 1971 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 1972 1973 Logically Collective on Mat 1974 1975 Input Parameters: 1976 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1977 . icntl - index of MUMPS parameter array ICNTL() 1978 - ival - value of MUMPS ICNTL(icntl) 1979 1980 Options Database: 1981 . -mat_mumps_icntl_<icntl> <ival> 1982 1983 Level: beginner 1984 1985 References: 1986 . MUMPS Users' Guide 1987 1988 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 1989 @*/ 1990 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 1991 { 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 PetscValidType(F,1); 1996 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 1997 PetscValidLogicalCollectiveInt(F,icntl,2); 1998 PetscValidLogicalCollectiveInt(F,ival,3); 1999 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /*@ 2004 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2005 2006 Logically Collective on Mat 2007 2008 Input Parameters: 2009 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2010 - icntl - index of MUMPS parameter array ICNTL() 2011 2012 Output Parameter: 2013 . ival - value of MUMPS ICNTL(icntl) 2014 2015 Level: beginner 2016 2017 References: 2018 . MUMPS Users' Guide 2019 2020 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2021 @*/ 2022 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2023 { 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 PetscValidType(F,1); 2028 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2029 PetscValidLogicalCollectiveInt(F,icntl,2); 2030 PetscValidIntPointer(ival,3); 2031 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 /* -------------------------------------------------------------------------------------------*/ 2036 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2037 { 2038 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2039 2040 PetscFunctionBegin; 2041 mumps->id.CNTL(icntl) = val; 2042 PetscFunctionReturn(0); 2043 } 2044 2045 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2046 { 2047 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2048 2049 PetscFunctionBegin; 2050 *val = mumps->id.CNTL(icntl); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /*@ 2055 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2056 2057 Logically Collective on Mat 2058 2059 Input Parameters: 2060 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2061 . icntl - index of MUMPS parameter array CNTL() 2062 - val - value of MUMPS CNTL(icntl) 2063 2064 Options Database: 2065 . -mat_mumps_cntl_<icntl> <val> 2066 2067 Level: beginner 2068 2069 References: 2070 . MUMPS Users' Guide 2071 2072 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2073 @*/ 2074 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2075 { 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 PetscValidType(F,1); 2080 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2081 PetscValidLogicalCollectiveInt(F,icntl,2); 2082 PetscValidLogicalCollectiveReal(F,val,3); 2083 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 /*@ 2088 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2089 2090 Logically Collective on Mat 2091 2092 Input Parameters: 2093 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2094 - icntl - index of MUMPS parameter array CNTL() 2095 2096 Output Parameter: 2097 . val - value of MUMPS CNTL(icntl) 2098 2099 Level: beginner 2100 2101 References: 2102 . MUMPS Users' Guide 2103 2104 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2105 @*/ 2106 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2107 { 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 PetscValidType(F,1); 2112 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2113 PetscValidLogicalCollectiveInt(F,icntl,2); 2114 PetscValidRealPointer(val,3); 2115 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2120 { 2121 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2122 2123 PetscFunctionBegin; 2124 *info = mumps->id.INFO(icntl); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2129 { 2130 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2131 2132 PetscFunctionBegin; 2133 *infog = mumps->id.INFOG(icntl); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2138 { 2139 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2140 2141 PetscFunctionBegin; 2142 *rinfo = mumps->id.RINFO(icntl); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2147 { 2148 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2149 2150 PetscFunctionBegin; 2151 *rinfog = mumps->id.RINFOG(icntl); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2156 { 2157 PetscErrorCode ierr; 2158 Mat Bt = NULL,Btseq = NULL; 2159 PetscBool flg; 2160 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2161 PetscScalar *aa; 2162 PetscInt spnr,*ia,*ja; 2163 2164 PetscFunctionBegin; 2165 PetscValidIntPointer(spRHS,2); 2166 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2167 if (flg) { 2168 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2169 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2170 2171 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2172 2173 if (mumps->petsc_size > 1) { 2174 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2175 Btseq = b->A; 2176 } else { 2177 Btseq = Bt; 2178 } 2179 2180 if (!mumps->myid) { 2181 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2182 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2183 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2184 2185 mumps->id.irhs_ptr = ia; 2186 mumps->id.irhs_sparse = ja; 2187 mumps->id.nz_rhs = ia[spnr] - 1; 2188 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2189 } else { 2190 mumps->id.irhs_ptr = NULL; 2191 mumps->id.irhs_sparse = NULL; 2192 mumps->id.nz_rhs = 0; 2193 mumps->id.rhs_sparse = NULL; 2194 } 2195 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2196 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2197 2198 /* solve phase */ 2199 /*-------------*/ 2200 mumps->id.job = JOB_SOLVE; 2201 PetscMUMPS_c(mumps); 2202 if (mumps->id.INFOG(1) < 0) 2203 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2204 2205 if (!mumps->myid) { 2206 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2207 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2208 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 2213 /*@ 2214 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2215 2216 Logically Collective on Mat 2217 2218 Input Parameters: 2219 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2220 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2221 2222 Output Parameter: 2223 . spRHS - requested entries of inverse of A 2224 2225 Level: beginner 2226 2227 References: 2228 . MUMPS Users' Guide 2229 2230 .seealso: MatGetFactor(), MatCreateTranspose() 2231 @*/ 2232 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2233 { 2234 PetscErrorCode ierr; 2235 2236 PetscFunctionBegin; 2237 PetscValidType(F,1); 2238 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2239 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2240 PetscFunctionReturn(0); 2241 } 2242 2243 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2244 { 2245 PetscErrorCode ierr; 2246 Mat spRHS; 2247 2248 PetscFunctionBegin; 2249 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2250 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2251 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 /*@ 2256 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2257 2258 Logically Collective on Mat 2259 2260 Input Parameters: 2261 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2262 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2263 2264 Output Parameter: 2265 . spRHST - requested entries of inverse of A^T 2266 2267 Level: beginner 2268 2269 References: 2270 . MUMPS Users' Guide 2271 2272 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2273 @*/ 2274 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2275 { 2276 PetscErrorCode ierr; 2277 PetscBool flg; 2278 2279 PetscFunctionBegin; 2280 PetscValidType(F,1); 2281 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2282 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2283 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2284 2285 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 /*@ 2290 MatMumpsGetInfo - Get MUMPS parameter INFO() 2291 2292 Logically Collective on Mat 2293 2294 Input Parameters: 2295 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2296 - icntl - index of MUMPS parameter array INFO() 2297 2298 Output Parameter: 2299 . ival - value of MUMPS INFO(icntl) 2300 2301 Level: beginner 2302 2303 References: 2304 . MUMPS Users' Guide 2305 2306 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2307 @*/ 2308 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 PetscValidType(F,1); 2314 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2315 PetscValidIntPointer(ival,3); 2316 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 /*@ 2321 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2322 2323 Logically Collective on Mat 2324 2325 Input Parameters: 2326 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2327 - icntl - index of MUMPS parameter array INFOG() 2328 2329 Output Parameter: 2330 . ival - value of MUMPS INFOG(icntl) 2331 2332 Level: beginner 2333 2334 References: 2335 . MUMPS Users' Guide 2336 2337 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2338 @*/ 2339 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2340 { 2341 PetscErrorCode ierr; 2342 2343 PetscFunctionBegin; 2344 PetscValidType(F,1); 2345 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2346 PetscValidIntPointer(ival,3); 2347 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 /*@ 2352 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2353 2354 Logically Collective on Mat 2355 2356 Input Parameters: 2357 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2358 - icntl - index of MUMPS parameter array RINFO() 2359 2360 Output Parameter: 2361 . val - value of MUMPS RINFO(icntl) 2362 2363 Level: beginner 2364 2365 References: 2366 . MUMPS Users' Guide 2367 2368 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2369 @*/ 2370 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2371 { 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 PetscValidType(F,1); 2376 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2377 PetscValidRealPointer(val,3); 2378 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 /*@ 2383 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2384 2385 Logically Collective on Mat 2386 2387 Input Parameters: 2388 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2389 - icntl - index of MUMPS parameter array RINFOG() 2390 2391 Output Parameter: 2392 . val - value of MUMPS RINFOG(icntl) 2393 2394 Level: beginner 2395 2396 References: 2397 . MUMPS Users' Guide 2398 2399 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2400 @*/ 2401 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2402 { 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 PetscValidType(F,1); 2407 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2408 PetscValidRealPointer(val,3); 2409 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 /*MC 2414 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2415 distributed and sequential matrices via the external package MUMPS. 2416 2417 Works with MATAIJ and MATSBAIJ matrices 2418 2419 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2420 2421 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2422 2423 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2424 2425 Options Database Keys: 2426 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2427 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2428 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2429 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2430 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2431 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2432 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2433 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2434 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2435 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2436 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2437 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2438 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2439 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2440 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2441 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2442 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2443 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2444 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2445 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2446 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2447 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2448 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2449 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2450 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2451 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2452 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2453 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2454 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2455 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2456 2457 Level: beginner 2458 2459 Notes: 2460 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PCSETUP_FAILED, one can find the MUMPS information about the failure by calling 2461 $ KSPGetPC(ksp,&pc); 2462 $ PCFactorGetMatrix(pc,&mat); 2463 $ MatMumpsGetInfo(mat,....); 2464 $ MatMumpsGetInfog(mat,....); etc. 2465 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2466 2467 If you want to run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still want to run the non-MUMPS part 2468 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2469 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2470 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or open sourced 2471 OpenBLAS (PETSc has configure options to install/specify them). With these conditions met, you can run your program as before but with 2472 an extra option -mat_mumps_use_omp_threads [m]. It works as if we set OMP_NUM_THREADS=m to MUMPS, with m defaults to the number of cores 2473 per CPU socket (or package, in hwloc term), or number of PETSc MPI processes on a node, whichever is smaller. 2474 2475 By flat-MPI or pure-MPI mode, it means you run your code with as many MPI ranks as the number of cores. For example, 2476 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test". To run MPI+OpenMP hybrid MUMPS, 2477 the tranditional way is to set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2478 threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". The problem of this approach is that the non-MUMPS 2479 part of your code is run with fewer cores and CPUs are wasted. "-mat_mumps_use_omp_threads [m]" provides an alternative such that 2480 you can stil run your code with as many MPI ranks as the number of cores, but have MUMPS run in MPI+OpenMP hybrid mode. In our example, 2481 you can use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16". 2482 2483 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type to get MPI 2484 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2485 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2486 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2487 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2488 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2489 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2490 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2491 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2492 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2493 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbsoe -m block:block to map consecutive MPI ranks to sockets and 2494 examine the mapping result. 2495 2496 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2497 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2498 calls omp_set_num_threads(m) internally before calling MUMPS. 2499 2500 References: 2501 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2502 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2503 2504 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2505 2506 M*/ 2507 2508 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2509 { 2510 PetscFunctionBegin; 2511 *type = MATSOLVERMUMPS; 2512 PetscFunctionReturn(0); 2513 } 2514 2515 /* MatGetFactor for Seq and MPI AIJ matrices */ 2516 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2517 { 2518 Mat B; 2519 PetscErrorCode ierr; 2520 Mat_MUMPS *mumps; 2521 PetscBool isSeqAIJ; 2522 2523 PetscFunctionBegin; 2524 /* Create the factorization matrix */ 2525 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2526 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2527 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2528 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2529 ierr = MatSetUp(B);CHKERRQ(ierr); 2530 2531 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2532 2533 B->ops->view = MatView_MUMPS; 2534 B->ops->getinfo = MatGetInfo_MUMPS; 2535 2536 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2539 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2540 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2545 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2547 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2548 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2549 2550 if (ftype == MAT_FACTOR_LU) { 2551 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2552 B->factortype = MAT_FACTOR_LU; 2553 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2554 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2555 mumps->sym = 0; 2556 } else { 2557 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2558 B->factortype = MAT_FACTOR_CHOLESKY; 2559 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2560 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2561 #if defined(PETSC_USE_COMPLEX) 2562 mumps->sym = 2; 2563 #else 2564 if (A->spd_set && A->spd) mumps->sym = 1; 2565 else mumps->sym = 2; 2566 #endif 2567 } 2568 2569 /* set solvertype */ 2570 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2571 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2572 2573 B->ops->destroy = MatDestroy_MUMPS; 2574 B->data = (void*)mumps; 2575 2576 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2577 2578 *F = B; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2583 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2584 { 2585 Mat B; 2586 PetscErrorCode ierr; 2587 Mat_MUMPS *mumps; 2588 PetscBool isSeqSBAIJ; 2589 2590 PetscFunctionBegin; 2591 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2592 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2593 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2594 /* Create the factorization matrix */ 2595 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2596 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2597 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2598 ierr = MatSetUp(B);CHKERRQ(ierr); 2599 2600 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2601 if (isSeqSBAIJ) { 2602 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2603 } else { 2604 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2605 } 2606 2607 B->ops->getinfo = MatGetInfo_External; 2608 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2609 B->ops->view = MatView_MUMPS; 2610 2611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2624 2625 B->factortype = MAT_FACTOR_CHOLESKY; 2626 #if defined(PETSC_USE_COMPLEX) 2627 mumps->sym = 2; 2628 #else 2629 if (A->spd_set && A->spd) mumps->sym = 1; 2630 else mumps->sym = 2; 2631 #endif 2632 2633 /* set solvertype */ 2634 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2635 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2636 2637 B->ops->destroy = MatDestroy_MUMPS; 2638 B->data = (void*)mumps; 2639 2640 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2641 2642 *F = B; 2643 PetscFunctionReturn(0); 2644 } 2645 2646 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2647 { 2648 Mat B; 2649 PetscErrorCode ierr; 2650 Mat_MUMPS *mumps; 2651 PetscBool isSeqBAIJ; 2652 2653 PetscFunctionBegin; 2654 /* Create the factorization matrix */ 2655 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2656 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2657 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2658 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2659 ierr = MatSetUp(B);CHKERRQ(ierr); 2660 2661 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2662 if (ftype == MAT_FACTOR_LU) { 2663 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2664 B->factortype = MAT_FACTOR_LU; 2665 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2666 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2667 mumps->sym = 0; 2668 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2669 2670 B->ops->getinfo = MatGetInfo_External; 2671 B->ops->view = MatView_MUMPS; 2672 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2686 2687 /* set solvertype */ 2688 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2689 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2690 2691 B->ops->destroy = MatDestroy_MUMPS; 2692 B->data = (void*)mumps; 2693 2694 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2695 2696 *F = B; 2697 PetscFunctionReturn(0); 2698 } 2699 2700 /* MatGetFactor for Seq and MPI SELL matrices */ 2701 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2702 { 2703 Mat B; 2704 PetscErrorCode ierr; 2705 Mat_MUMPS *mumps; 2706 PetscBool isSeqSELL; 2707 2708 PetscFunctionBegin; 2709 /* Create the factorization matrix */ 2710 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2711 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2712 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2713 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2714 ierr = MatSetUp(B);CHKERRQ(ierr); 2715 2716 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2717 2718 B->ops->view = MatView_MUMPS; 2719 B->ops->getinfo = MatGetInfo_MUMPS; 2720 2721 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2722 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2723 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2724 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2725 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2726 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2727 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2728 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2729 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2732 2733 if (ftype == MAT_FACTOR_LU) { 2734 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2735 B->factortype = MAT_FACTOR_LU; 2736 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2737 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2738 mumps->sym = 0; 2739 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2740 2741 /* set solvertype */ 2742 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2743 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2744 2745 B->ops->destroy = MatDestroy_MUMPS; 2746 B->data = (void*)mumps; 2747 2748 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2749 2750 *F = B; 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2755 { 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2760 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2761 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2762 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2763 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2764 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2765 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2766 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2767 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2768 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2769 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef PETSC_HAVE_OPENMP_SUPPORT 2774 2775