1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 #if defined(PETSC_USE_COMPLEX) 344 PetscBool hermitian; 345 #endif 346 347 PetscFunctionBegin; 348 #if defined(PETSC_USE_COMPLEX) 349 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 350 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 351 #endif 352 *v = aa->a; 353 if (reuse == MAT_INITIAL_MATRIX) { 354 nz = aa->nz; 355 ai = aa->i; 356 aj = aa->j; 357 *v = aa->a; 358 *nnz = nz; 359 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 360 col = row + nz; 361 362 nz = 0; 363 for (i=0; i<M; i++) { 364 rnz = ai[i+1] - ai[i]; 365 ajj = aj + ai[i]; 366 for (j=0; j<rnz; j++) { 367 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 368 } 369 } 370 *r = row; *c = col; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 376 { 377 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 378 PetscInt nz,rnz,i,j; 379 const PetscScalar *av,*v1; 380 PetscScalar *val; 381 PetscErrorCode ierr; 382 PetscInt *row,*col; 383 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 384 PetscBool missing; 385 #if defined(PETSC_USE_COMPLEX) 386 PetscBool hermitian; 387 #endif 388 389 PetscFunctionBegin; 390 #if defined(PETSC_USE_COMPLEX) 391 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 392 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 393 #endif 394 ai = aa->i; aj = aa->j; av = aa->a; 395 adiag = aa->diag; 396 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 397 if (reuse == MAT_INITIAL_MATRIX) { 398 /* count nz in the upper triangular part of A */ 399 nz = 0; 400 if (missing) { 401 for (i=0; i<M; i++) { 402 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 403 for (j=ai[i];j<ai[i+1];j++) { 404 if (aj[j] < i) continue; 405 nz++; 406 } 407 } else { 408 nz += ai[i+1] - adiag[i]; 409 } 410 } 411 } else { 412 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 413 } 414 *nnz = nz; 415 416 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 417 col = row + nz; 418 val = (PetscScalar*)(col + nz); 419 420 nz = 0; 421 if (missing) { 422 for (i=0; i<M; i++) { 423 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 424 for (j=ai[i];j<ai[i+1];j++) { 425 if (aj[j] < i) continue; 426 row[nz] = i+shift; 427 col[nz] = aj[j]+shift; 428 val[nz] = av[j]; 429 nz++; 430 } 431 } else { 432 rnz = ai[i+1] - adiag[i]; 433 ajj = aj + adiag[i]; 434 v1 = av + adiag[i]; 435 for (j=0; j<rnz; j++) { 436 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 437 } 438 } 439 } 440 } else { 441 for (i=0; i<M; i++) { 442 rnz = ai[i+1] - adiag[i]; 443 ajj = aj + adiag[i]; 444 v1 = av + adiag[i]; 445 for (j=0; j<rnz; j++) { 446 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 447 } 448 } 449 } 450 *r = row; *c = col; *v = val; 451 } else { 452 nz = 0; val = *v; 453 if (missing) { 454 for (i=0; i <M; i++) { 455 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 456 for (j=ai[i];j<ai[i+1];j++) { 457 if (aj[j] < i) continue; 458 val[nz++] = av[j]; 459 } 460 } else { 461 rnz = ai[i+1] - adiag[i]; 462 v1 = av + adiag[i]; 463 for (j=0; j<rnz; j++) { 464 val[nz++] = v1[j]; 465 } 466 } 467 } 468 } else { 469 for (i=0; i <M; i++) { 470 rnz = ai[i+1] - adiag[i]; 471 v1 = av + adiag[i]; 472 for (j=0; j<rnz; j++) { 473 val[nz++] = v1[j]; 474 } 475 } 476 } 477 } 478 PetscFunctionReturn(0); 479 } 480 481 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 482 { 483 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 484 PetscErrorCode ierr; 485 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 486 PetscInt *row,*col; 487 const PetscScalar *av, *bv,*v1,*v2; 488 PetscScalar *val; 489 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 490 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 491 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 492 #if defined(PETSC_USE_COMPLEX) 493 PetscBool hermitian; 494 #endif 495 496 PetscFunctionBegin; 497 #if defined(PETSC_USE_COMPLEX) 498 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 499 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 500 #endif 501 rstart = A->rmap->rstart; 502 ai = aa->i; 503 aj = aa->j; 504 bi = bb->i; 505 bj = bb->j; 506 av = aa->a; 507 bv = bb->a; 508 509 garray = mat->garray; 510 511 if (reuse == MAT_INITIAL_MATRIX) { 512 nz = aa->nz + bb->nz; 513 *nnz = nz; 514 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 515 col = row + nz; 516 val = (PetscScalar*)(col + nz); 517 518 *r = row; *c = col; *v = val; 519 } else { 520 row = *r; col = *c; val = *v; 521 } 522 523 jj = 0; irow = rstart; 524 for (i=0; i<m; i++) { 525 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 526 countA = ai[i+1] - ai[i]; 527 countB = bi[i+1] - bi[i]; 528 bjj = bj + bi[i]; 529 v1 = av + ai[i]; 530 v2 = bv + bi[i]; 531 532 /* A-part */ 533 for (j=0; j<countA; j++) { 534 if (reuse == MAT_INITIAL_MATRIX) { 535 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 536 } 537 val[jj++] = v1[j]; 538 } 539 540 /* B-part */ 541 for (j=0; j < countB; j++) { 542 if (reuse == MAT_INITIAL_MATRIX) { 543 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 544 } 545 val[jj++] = v2[j]; 546 } 547 irow++; 548 } 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 553 { 554 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 555 PetscErrorCode ierr; 556 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 557 PetscInt *row,*col; 558 const PetscScalar *av, *bv,*v1,*v2; 559 PetscScalar *val; 560 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 561 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 562 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 563 564 PetscFunctionBegin; 565 rstart = A->rmap->rstart; 566 ai = aa->i; 567 aj = aa->j; 568 bi = bb->i; 569 bj = bb->j; 570 av = aa->a; 571 bv = bb->a; 572 573 garray = mat->garray; 574 575 if (reuse == MAT_INITIAL_MATRIX) { 576 nz = aa->nz + bb->nz; 577 *nnz = nz; 578 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 579 col = row + nz; 580 val = (PetscScalar*)(col + nz); 581 582 *r = row; *c = col; *v = val; 583 } else { 584 row = *r; col = *c; val = *v; 585 } 586 587 jj = 0; irow = rstart; 588 for (i=0; i<m; i++) { 589 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 590 countA = ai[i+1] - ai[i]; 591 countB = bi[i+1] - bi[i]; 592 bjj = bj + bi[i]; 593 v1 = av + ai[i]; 594 v2 = bv + bi[i]; 595 596 /* A-part */ 597 for (j=0; j<countA; j++) { 598 if (reuse == MAT_INITIAL_MATRIX) { 599 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 600 } 601 val[jj++] = v1[j]; 602 } 603 604 /* B-part */ 605 for (j=0; j < countB; j++) { 606 if (reuse == MAT_INITIAL_MATRIX) { 607 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 608 } 609 val[jj++] = v2[j]; 610 } 611 irow++; 612 } 613 PetscFunctionReturn(0); 614 } 615 616 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 617 { 618 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 619 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 620 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 621 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 622 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 623 const PetscInt bs2=mat->bs2; 624 PetscErrorCode ierr; 625 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 626 PetscInt *row,*col; 627 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 628 PetscScalar *val; 629 630 PetscFunctionBegin; 631 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 632 if (reuse == MAT_INITIAL_MATRIX) { 633 nz = bs2*(aa->nz + bb->nz); 634 *nnz = nz; 635 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 636 col = row + nz; 637 val = (PetscScalar*)(col + nz); 638 639 *r = row; *c = col; *v = val; 640 } else { 641 row = *r; col = *c; val = *v; 642 } 643 644 jj = 0; irow = rstart; 645 for (i=0; i<mbs; i++) { 646 countA = ai[i+1] - ai[i]; 647 countB = bi[i+1] - bi[i]; 648 ajj = aj + ai[i]; 649 bjj = bj + bi[i]; 650 v1 = av + bs2*ai[i]; 651 v2 = bv + bs2*bi[i]; 652 653 idx = 0; 654 /* A-part */ 655 for (k=0; k<countA; k++) { 656 for (j=0; j<bs; j++) { 657 for (n=0; n<bs; n++) { 658 if (reuse == MAT_INITIAL_MATRIX) { 659 row[jj] = irow + n + shift; 660 col[jj] = rstart + bs*ajj[k] + j + shift; 661 } 662 val[jj++] = v1[idx++]; 663 } 664 } 665 } 666 667 idx = 0; 668 /* B-part */ 669 for (k=0; k<countB; k++) { 670 for (j=0; j<bs; j++) { 671 for (n=0; n<bs; n++) { 672 if (reuse == MAT_INITIAL_MATRIX) { 673 row[jj] = irow + n + shift; 674 col[jj] = bs*garray[bjj[k]] + j + shift; 675 } 676 val[jj++] = v2[idx++]; 677 } 678 } 679 } 680 irow += bs; 681 } 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 686 { 687 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 688 PetscErrorCode ierr; 689 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 690 PetscInt *row,*col; 691 const PetscScalar *av, *bv,*v1,*v2; 692 PetscScalar *val; 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 694 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 695 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 696 #if defined(PETSC_USE_COMPLEX) 697 PetscBool hermitian; 698 #endif 699 700 PetscFunctionBegin; 701 #if defined(PETSC_USE_COMPLEX) 702 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 703 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 704 #endif 705 ai = aa->i; 706 aj = aa->j; 707 adiag = aa->diag; 708 bi = bb->i; 709 bj = bb->j; 710 garray = mat->garray; 711 av = aa->a; 712 bv = bb->a; 713 714 rstart = A->rmap->rstart; 715 716 if (reuse == MAT_INITIAL_MATRIX) { 717 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 718 nzb = 0; /* num of upper triangular entries in mat->B */ 719 for (i=0; i<m; i++) { 720 nza += (ai[i+1] - adiag[i]); 721 countB = bi[i+1] - bi[i]; 722 bjj = bj + bi[i]; 723 for (j=0; j<countB; j++) { 724 if (garray[bjj[j]] > rstart) nzb++; 725 } 726 } 727 728 nz = nza + nzb; /* total nz of upper triangular part of mat */ 729 *nnz = nz; 730 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 731 col = row + nz; 732 val = (PetscScalar*)(col + nz); 733 734 *r = row; *c = col; *v = val; 735 } else { 736 row = *r; col = *c; val = *v; 737 } 738 739 jj = 0; irow = rstart; 740 for (i=0; i<m; i++) { 741 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 742 v1 = av + adiag[i]; 743 countA = ai[i+1] - adiag[i]; 744 countB = bi[i+1] - bi[i]; 745 bjj = bj + bi[i]; 746 v2 = bv + bi[i]; 747 748 /* A-part */ 749 for (j=0; j<countA; j++) { 750 if (reuse == MAT_INITIAL_MATRIX) { 751 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 752 } 753 val[jj++] = v1[j]; 754 } 755 756 /* B-part */ 757 for (j=0; j < countB; j++) { 758 if (garray[bjj[j]] > rstart) { 759 if (reuse == MAT_INITIAL_MATRIX) { 760 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 761 } 762 val[jj++] = v2[j]; 763 } 764 } 765 irow++; 766 } 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatDestroy_MUMPS(Mat A) 771 { 772 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 777 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 778 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 779 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 780 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 781 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 782 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 783 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 784 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 785 mumps->id.job = JOB_END; 786 PetscMUMPS_c(mumps); 787 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 788 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 789 #endif 790 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 791 ierr = PetscFree(A->data);CHKERRQ(ierr); 792 793 /* clear composed functions */ 794 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 797 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 798 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 799 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 800 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 801 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 802 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 803 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 804 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 805 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 806 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 811 { 812 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 813 PetscScalar *array; 814 Vec b_seq; 815 IS is_iden,is_petsc; 816 PetscErrorCode ierr; 817 PetscInt i; 818 PetscBool second_solve = PETSC_FALSE; 819 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 820 821 PetscFunctionBegin; 822 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 823 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 824 825 if (A->factorerrortype) { 826 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 827 ierr = VecSetInf(x);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 mumps->id.ICNTL(20)= 0; /* dense RHS */ 832 mumps->id.nrhs = 1; 833 b_seq = mumps->b_seq; 834 if (mumps->petsc_size > 1) { 835 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 836 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 837 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 839 } else { /* petsc_size == 1 */ 840 ierr = VecCopy(b,x);CHKERRQ(ierr); 841 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 842 } 843 if (!mumps->myid) { /* define rhs on the host */ 844 mumps->id.nrhs = 1; 845 mumps->id.rhs = (MumpsScalar*)array; 846 } 847 848 /* 849 handle condensation step of Schur complement (if any) 850 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 851 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 852 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 853 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 854 */ 855 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 856 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 857 second_solve = PETSC_TRUE; 858 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 859 } 860 /* solve phase */ 861 /*-------------*/ 862 mumps->id.job = JOB_SOLVE; 863 PetscMUMPS_c(mumps); 864 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 865 866 /* handle expansion step of Schur complement (if any) */ 867 if (second_solve) { 868 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 869 } 870 871 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 872 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 873 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 874 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 875 } 876 if (!mumps->scat_sol) { /* create scatter scat_sol */ 877 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 878 for (i=0; i<mumps->id.lsol_loc; i++) { 879 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 880 } 881 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 882 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 884 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 885 886 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 887 } 888 889 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 } 892 893 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 894 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 895 896 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 901 { 902 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 mumps->id.ICNTL(9) = 0; 907 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 908 mumps->id.ICNTL(9) = 1; 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 913 { 914 PetscErrorCode ierr; 915 Mat Bt = NULL; 916 PetscBool flg, flgT; 917 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 918 PetscInt i,nrhs,M; 919 PetscScalar *array; 920 const PetscScalar *rbray; 921 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 922 PetscScalar *bray,*sol_loc,*sol_loc_save; 923 IS is_to,is_from; 924 PetscInt k,proc,j,m,myrstart; 925 const PetscInt *rstart; 926 Vec v_mpi,b_seq,msol_loc; 927 VecScatter scat_rhs,scat_sol; 928 PetscScalar *aa; 929 PetscInt spnr,*ia,*ja; 930 Mat_MPIAIJ *b = NULL; 931 932 PetscFunctionBegin; 933 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 934 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 935 936 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 937 if (flg) { /* dense B */ 938 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 939 mumps->id.ICNTL(20)= 0; /* dense RHS */ 940 } else { /* sparse B */ 941 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 942 if (flgT) { /* input B is transpose of actural RHS matrix, 943 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 944 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 945 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 946 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 947 } 948 949 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 950 mumps->id.nrhs = nrhs; 951 mumps->id.lrhs = M; 952 mumps->id.rhs = NULL; 953 954 if (mumps->petsc_size == 1) { 955 PetscScalar *aa; 956 PetscInt spnr,*ia,*ja; 957 PetscBool second_solve = PETSC_FALSE; 958 959 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 960 mumps->id.rhs = (MumpsScalar*)array; 961 962 if (!Bt) { /* dense B */ 963 /* copy B to X */ 964 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 965 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 966 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 967 } else { /* sparse B */ 968 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 969 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 970 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 971 /* mumps requires ia and ja start at 1! */ 972 mumps->id.irhs_ptr = ia; 973 mumps->id.irhs_sparse = ja; 974 mumps->id.nz_rhs = ia[spnr] - 1; 975 mumps->id.rhs_sparse = (MumpsScalar*)aa; 976 } 977 /* handle condensation step of Schur complement (if any) */ 978 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 979 second_solve = PETSC_TRUE; 980 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 981 } 982 /* solve phase */ 983 /*-------------*/ 984 mumps->id.job = JOB_SOLVE; 985 PetscMUMPS_c(mumps); 986 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 987 988 /* handle expansion step of Schur complement (if any) */ 989 if (second_solve) { 990 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 if (Bt) { /* sparse B */ 993 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 994 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 995 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 996 } 997 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1002 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1003 1004 /* create msol_loc to hold mumps local solution */ 1005 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1006 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1007 1008 lsol_loc = mumps->id.lsol_loc; 1009 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1010 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1011 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1012 mumps->id.isol_loc = isol_loc; 1013 1014 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1015 1016 if (!Bt) { /* dense B */ 1017 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1018 /* wrap dense rhs matrix B into a vector v_mpi */ 1019 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1020 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1021 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1022 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1023 1024 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1025 if (!mumps->myid) { 1026 PetscInt *idx; 1027 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1028 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1029 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1030 k = 0; 1031 for (proc=0; proc<mumps->petsc_size; proc++){ 1032 for (j=0; j<nrhs; j++){ 1033 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1034 } 1035 } 1036 1037 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1038 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1039 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1040 } else { 1041 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1042 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1043 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1044 } 1045 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1046 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1048 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 1051 if (!mumps->myid) { /* define rhs on the host */ 1052 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1053 mumps->id.rhs = (MumpsScalar*)bray; 1054 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1055 } 1056 1057 } else { /* sparse B */ 1058 b = (Mat_MPIAIJ*)Bt->data; 1059 1060 /* wrap dense X into a vector v_mpi */ 1061 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1062 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1063 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1064 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1065 1066 if (!mumps->myid) { 1067 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1068 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1069 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1070 /* mumps requires ia and ja start at 1! */ 1071 mumps->id.irhs_ptr = ia; 1072 mumps->id.irhs_sparse = ja; 1073 mumps->id.nz_rhs = ia[spnr] - 1; 1074 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1075 } else { 1076 mumps->id.irhs_ptr = NULL; 1077 mumps->id.irhs_sparse = NULL; 1078 mumps->id.nz_rhs = 0; 1079 mumps->id.rhs_sparse = NULL; 1080 } 1081 } 1082 1083 /* solve phase */ 1084 /*-------------*/ 1085 mumps->id.job = JOB_SOLVE; 1086 PetscMUMPS_c(mumps); 1087 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1088 1089 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1090 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1091 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1092 1093 /* create scatter scat_sol */ 1094 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1095 /* iidx: index for scatter mumps solution to petsc X */ 1096 1097 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1098 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1099 for (i=0; i<lsol_loc; i++) { 1100 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1101 1102 for (proc=0; proc<mumps->petsc_size; proc++){ 1103 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1104 myrstart = rstart[proc]; 1105 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1106 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1107 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1108 break; 1109 } 1110 } 1111 1112 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1113 } 1114 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1115 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1116 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1118 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1121 1122 /* free spaces */ 1123 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1124 mumps->id.isol_loc = isol_loc_save; 1125 1126 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1127 ierr = PetscFree(idxx);CHKERRQ(ierr); 1128 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1129 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1130 if (Bt) { 1131 if (!mumps->myid) { 1132 b = (Mat_MPIAIJ*)Bt->data; 1133 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1134 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1135 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1136 } 1137 } else { 1138 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1139 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1140 } 1141 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1147 { 1148 PetscErrorCode ierr; 1149 PetscBool flg; 1150 Mat B; 1151 1152 PetscFunctionBegin; 1153 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1154 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1155 1156 /* Create B=Bt^T that uses Bt's data structure */ 1157 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1158 1159 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1160 ierr = MatDestroy(&B);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #if !defined(PETSC_USE_COMPLEX) 1165 /* 1166 input: 1167 F: numeric factor 1168 output: 1169 nneg: total number of negative pivots 1170 nzero: total number of zero pivots 1171 npos: (global dimension of F) - nneg - nzero 1172 */ 1173 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1174 { 1175 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1176 PetscErrorCode ierr; 1177 PetscMPIInt size; 1178 1179 PetscFunctionBegin; 1180 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1181 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1182 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1183 1184 if (nneg) *nneg = mumps->id.INFOG(12); 1185 if (nzero || npos) { 1186 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1187 if (nzero) *nzero = mumps->id.INFOG(28); 1188 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 #endif 1193 1194 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1195 { 1196 PetscErrorCode ierr; 1197 PetscInt i,nz=0,*irn,*jcn=0; 1198 PetscScalar *val=0; 1199 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1200 1201 PetscFunctionBegin; 1202 if (mumps->omp_comm_size > 1) { 1203 if (reuse == MAT_INITIAL_MATRIX) { 1204 /* master first gathers counts of nonzeros to receive */ 1205 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1206 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1207 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1208 1209 /* master allocates memory to receive nonzeros */ 1210 if (mumps->is_omp_master) { 1211 displs[0] = 0; 1212 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1213 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1214 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1215 jcn = irn + nz; 1216 val = (PetscScalar*)(jcn + nz); 1217 } 1218 1219 /* save the gatherv plan */ 1220 mumps->mpinz = mpinz; /* used as send count */ 1221 mumps->recvcount = recvcount; 1222 mumps->displs = displs; 1223 1224 /* master gathers nonzeros */ 1225 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1226 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1227 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1228 1229 /* master frees its row/col/val and replaces them with bigger arrays */ 1230 if (mumps->is_omp_master) { 1231 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1232 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1233 mumps->irn = irn; 1234 mumps->jcn = jcn; 1235 mumps->val = val; 1236 } 1237 } else { 1238 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1239 } 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1245 { 1246 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1247 PetscErrorCode ierr; 1248 PetscBool isMPIAIJ; 1249 1250 PetscFunctionBegin; 1251 if (mumps->id.INFOG(1) < 0) { 1252 if (mumps->id.INFOG(1) == -6) { 1253 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1254 } 1255 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1260 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1261 1262 /* numerical factorization phase */ 1263 /*-------------------------------*/ 1264 mumps->id.job = JOB_FACTNUMERIC; 1265 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1266 if (!mumps->myid) { 1267 mumps->id.a = (MumpsScalar*)mumps->val; 1268 } 1269 } else { 1270 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1271 } 1272 PetscMUMPS_c(mumps); 1273 if (mumps->id.INFOG(1) < 0) { 1274 if (A->erroriffailure) { 1275 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1276 } else { 1277 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1278 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1279 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1280 } else if (mumps->id.INFOG(1) == -13) { 1281 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1282 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1283 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1284 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1285 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1286 } else { 1287 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1288 F->factorerrortype = MAT_FACTOR_OTHER; 1289 } 1290 } 1291 } 1292 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1293 1294 F->assembled = PETSC_TRUE; 1295 mumps->matstruc = SAME_NONZERO_PATTERN; 1296 if (F->schur) { /* reset Schur status to unfactored */ 1297 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1298 mumps->id.ICNTL(19) = 2; 1299 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1300 } 1301 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1302 } 1303 1304 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1305 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1306 1307 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1308 if (mumps->petsc_size > 1) { 1309 PetscInt lsol_loc; 1310 PetscScalar *sol_loc; 1311 1312 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1313 1314 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1315 if (mumps->x_seq) { 1316 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1317 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1318 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1319 } 1320 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1321 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1322 mumps->id.lsol_loc = lsol_loc; 1323 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1324 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1325 } 1326 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /* Sets MUMPS options from the options database */ 1331 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1332 { 1333 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1334 PetscErrorCode ierr; 1335 PetscInt icntl,info[80],i,ninfo=80; 1336 PetscBool flg; 1337 1338 PetscFunctionBegin; 1339 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1340 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1341 if (flg) mumps->id.ICNTL(1) = icntl; 1342 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1343 if (flg) mumps->id.ICNTL(2) = icntl; 1344 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1345 if (flg) mumps->id.ICNTL(3) = icntl; 1346 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1348 if (flg) mumps->id.ICNTL(4) = icntl; 1349 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1350 1351 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1352 if (flg) mumps->id.ICNTL(6) = icntl; 1353 1354 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1355 if (flg) { 1356 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1357 else mumps->id.ICNTL(7) = icntl; 1358 } 1359 1360 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1361 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1362 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1363 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1364 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1365 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1366 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1367 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1368 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1369 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1370 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1371 } 1372 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1373 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1374 1375 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1376 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1377 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1378 if (mumps->id.ICNTL(24)) { 1379 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1380 } 1381 1382 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1383 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1384 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1386 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1387 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1388 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1389 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1390 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1391 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1392 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1393 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1394 1395 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1396 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1397 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1398 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1399 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1400 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1401 1402 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1403 1404 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1405 if (ninfo) { 1406 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1407 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1408 mumps->ninfo = ninfo; 1409 for (i=0; i<ninfo; i++) { 1410 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1411 else mumps->info[i] = info[i]; 1412 } 1413 } 1414 1415 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1420 { 1421 PetscErrorCode ierr; 1422 PetscInt nthreads=0; 1423 1424 PetscFunctionBegin; 1425 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1426 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1427 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1428 1429 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1430 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1431 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1432 if (mumps->use_petsc_omp_support) { 1433 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1434 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1435 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1436 #else 1437 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1438 #endif 1439 } else { 1440 mumps->omp_comm = PETSC_COMM_SELF; 1441 mumps->mumps_comm = mumps->petsc_comm; 1442 mumps->is_omp_master = PETSC_TRUE; 1443 } 1444 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1445 1446 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1447 mumps->id.job = JOB_INIT; 1448 mumps->id.par = 1; /* host participates factorizaton and solve */ 1449 mumps->id.sym = mumps->sym; 1450 1451 PetscMUMPS_c(mumps); 1452 1453 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1454 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1455 */ 1456 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1457 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1458 1459 mumps->scat_rhs = NULL; 1460 mumps->scat_sol = NULL; 1461 1462 /* set PETSc-MUMPS default options - override MUMPS default */ 1463 mumps->id.ICNTL(3) = 0; 1464 mumps->id.ICNTL(4) = 0; 1465 if (mumps->petsc_size == 1) { 1466 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1467 } else { 1468 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1469 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1470 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1471 } 1472 1473 /* schur */ 1474 mumps->id.size_schur = 0; 1475 mumps->id.listvar_schur = NULL; 1476 mumps->id.schur = NULL; 1477 mumps->sizeredrhs = 0; 1478 mumps->schur_sol = NULL; 1479 mumps->schur_sizesol = 0; 1480 PetscFunctionReturn(0); 1481 } 1482 1483 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1484 { 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1489 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1490 if (mumps->id.INFOG(1) < 0) { 1491 if (A->erroriffailure) { 1492 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1493 } else { 1494 if (mumps->id.INFOG(1) == -6) { 1495 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1496 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1497 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1498 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1499 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1500 } else { 1501 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1502 F->factorerrortype = MAT_FACTOR_OTHER; 1503 } 1504 } 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1510 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1511 { 1512 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1513 PetscErrorCode ierr; 1514 Vec b; 1515 IS is_iden; 1516 const PetscInt M = A->rmap->N; 1517 1518 PetscFunctionBegin; 1519 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1520 1521 /* Set MUMPS options from the options database */ 1522 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1523 1524 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1525 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1526 1527 /* analysis phase */ 1528 /*----------------*/ 1529 mumps->id.job = JOB_FACTSYMBOLIC; 1530 mumps->id.n = M; 1531 switch (mumps->id.ICNTL(18)) { 1532 case 0: /* centralized assembled matrix input */ 1533 if (!mumps->myid) { 1534 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1535 if (mumps->id.ICNTL(6)>1) { 1536 mumps->id.a = (MumpsScalar*)mumps->val; 1537 } 1538 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1539 /* 1540 PetscBool flag; 1541 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1542 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1543 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1544 */ 1545 if (!mumps->myid) { 1546 const PetscInt *idx; 1547 PetscInt i,*perm_in; 1548 1549 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1550 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1551 1552 mumps->id.perm_in = perm_in; 1553 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1554 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1555 } 1556 } 1557 } 1558 break; 1559 case 3: /* distributed assembled matrix input (size>1) */ 1560 mumps->id.nz_loc = mumps->nz; 1561 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1562 if (mumps->id.ICNTL(6)>1) { 1563 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1564 } 1565 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1566 if (!mumps->myid) { 1567 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->N,&mumps->b_seq);CHKERRQ(ierr); 1568 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap->N,0,1,&is_iden);CHKERRQ(ierr); 1569 } else { 1570 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1571 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1572 } 1573 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1574 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1575 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1576 ierr = VecDestroy(&b);CHKERRQ(ierr); 1577 break; 1578 } 1579 PetscMUMPS_c(mumps); 1580 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1581 1582 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1583 F->ops->solve = MatSolve_MUMPS; 1584 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1585 F->ops->matsolve = MatMatSolve_MUMPS; 1586 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* Note the Petsc r and c permutations are ignored */ 1591 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1592 { 1593 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1594 PetscErrorCode ierr; 1595 Vec b; 1596 IS is_iden; 1597 const PetscInt M = A->rmap->N; 1598 1599 PetscFunctionBegin; 1600 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1601 1602 /* Set MUMPS options from the options database */ 1603 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1604 1605 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1606 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1607 1608 /* analysis phase */ 1609 /*----------------*/ 1610 mumps->id.job = JOB_FACTSYMBOLIC; 1611 mumps->id.n = M; 1612 switch (mumps->id.ICNTL(18)) { 1613 case 0: /* centralized assembled matrix input */ 1614 if (!mumps->myid) { 1615 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1616 if (mumps->id.ICNTL(6)>1) { 1617 mumps->id.a = (MumpsScalar*)mumps->val; 1618 } 1619 } 1620 break; 1621 case 3: /* distributed assembled matrix input (size>1) */ 1622 mumps->id.nz_loc = mumps->nz; 1623 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1624 if (mumps->id.ICNTL(6)>1) { 1625 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1626 } 1627 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1628 if (!mumps->myid) { 1629 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1630 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1631 } else { 1632 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1633 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1634 } 1635 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1636 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1637 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1638 ierr = VecDestroy(&b);CHKERRQ(ierr); 1639 break; 1640 } 1641 PetscMUMPS_c(mumps); 1642 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1643 1644 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1645 F->ops->solve = MatSolve_MUMPS; 1646 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /* Note the Petsc r permutation and factor info are ignored */ 1651 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1652 { 1653 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1654 PetscErrorCode ierr; 1655 Vec b; 1656 IS is_iden; 1657 const PetscInt M = A->rmap->N; 1658 1659 PetscFunctionBegin; 1660 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1661 1662 /* Set MUMPS options from the options database */ 1663 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1664 1665 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1666 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1667 1668 /* analysis phase */ 1669 /*----------------*/ 1670 mumps->id.job = JOB_FACTSYMBOLIC; 1671 mumps->id.n = M; 1672 switch (mumps->id.ICNTL(18)) { 1673 case 0: /* centralized assembled matrix input */ 1674 if (!mumps->myid) { 1675 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1676 if (mumps->id.ICNTL(6)>1) { 1677 mumps->id.a = (MumpsScalar*)mumps->val; 1678 } 1679 } 1680 break; 1681 case 3: /* distributed assembled matrix input (size>1) */ 1682 mumps->id.nz_loc = mumps->nz; 1683 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1684 if (mumps->id.ICNTL(6)>1) { 1685 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1686 } 1687 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1688 if (!mumps->myid) { 1689 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1690 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1691 } else { 1692 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1693 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1694 } 1695 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1696 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1697 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1698 ierr = VecDestroy(&b);CHKERRQ(ierr); 1699 break; 1700 } 1701 PetscMUMPS_c(mumps); 1702 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1703 1704 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1705 F->ops->solve = MatSolve_MUMPS; 1706 F->ops->solvetranspose = MatSolve_MUMPS; 1707 F->ops->matsolve = MatMatSolve_MUMPS; 1708 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1709 #if defined(PETSC_USE_COMPLEX) 1710 F->ops->getinertia = NULL; 1711 #else 1712 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1713 #endif 1714 PetscFunctionReturn(0); 1715 } 1716 1717 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1718 { 1719 PetscErrorCode ierr; 1720 PetscBool iascii; 1721 PetscViewerFormat format; 1722 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1723 1724 PetscFunctionBegin; 1725 /* check if matrix is mumps type */ 1726 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1727 1728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1729 if (iascii) { 1730 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1731 if (format == PETSC_VIEWER_ASCII_INFO) { 1732 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1733 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1734 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1735 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1736 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1737 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1738 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1739 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1740 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1741 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1742 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1743 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1744 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1745 if (mumps->id.ICNTL(11)>0) { 1746 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1748 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1749 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1750 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1751 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1752 } 1753 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1754 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1755 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1756 /* ICNTL(15-17) not used */ 1757 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1758 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1759 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1760 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1762 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1763 1764 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1765 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1766 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1767 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1769 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1770 1771 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1777 1778 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1784 1785 /* infomation local to each processor */ 1786 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1788 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1789 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1791 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1792 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1795 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1796 1797 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1798 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1799 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1800 1801 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1802 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1803 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1804 1805 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1806 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1807 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1808 1809 if (mumps->ninfo && mumps->ninfo <= 80){ 1810 PetscInt i; 1811 for (i=0; i<mumps->ninfo; i++){ 1812 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1813 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1814 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1815 } 1816 } 1817 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1818 1819 if (!mumps->myid) { /* information from the host */ 1820 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1824 1825 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1838 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1845 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1852 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1859 } 1860 } 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1866 { 1867 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1868 1869 PetscFunctionBegin; 1870 info->block_size = 1.0; 1871 info->nz_allocated = mumps->id.INFOG(20); 1872 info->nz_used = mumps->id.INFOG(20); 1873 info->nz_unneeded = 0.0; 1874 info->assemblies = 0.0; 1875 info->mallocs = 0.0; 1876 info->memory = 0.0; 1877 info->fill_ratio_given = 0; 1878 info->fill_ratio_needed = 0; 1879 info->factor_mallocs = 0; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 /* -------------------------------------------------------------------------------------------*/ 1884 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1885 { 1886 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1887 const PetscInt *idxs; 1888 PetscInt size,i; 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1893 if (mumps->petsc_size > 1) { 1894 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1895 1896 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1897 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1898 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1899 } 1900 if (mumps->id.size_schur != size) { 1901 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1902 mumps->id.size_schur = size; 1903 mumps->id.schur_lld = size; 1904 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1905 } 1906 1907 /* Schur complement matrix */ 1908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1909 if (mumps->sym == 1) { 1910 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1911 } 1912 1913 /* MUMPS expects Fortran style indices */ 1914 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1915 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1916 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1917 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1918 if (mumps->petsc_size > 1) { 1919 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1920 } else { 1921 if (F->factortype == MAT_FACTOR_LU) { 1922 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1923 } else { 1924 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1925 } 1926 } 1927 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1928 mumps->id.ICNTL(26) = -1; 1929 PetscFunctionReturn(0); 1930 } 1931 1932 /* -------------------------------------------------------------------------------------------*/ 1933 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1934 { 1935 Mat St; 1936 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1937 PetscScalar *array; 1938 #if defined(PETSC_USE_COMPLEX) 1939 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1940 #endif 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1945 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1946 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1947 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1948 ierr = MatSetUp(St);CHKERRQ(ierr); 1949 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1950 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1951 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1952 PetscInt i,j,N=mumps->id.size_schur; 1953 for (i=0;i<N;i++) { 1954 for (j=0;j<N;j++) { 1955 #if !defined(PETSC_USE_COMPLEX) 1956 PetscScalar val = mumps->id.schur[i*N+j]; 1957 #else 1958 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1959 #endif 1960 array[j*N+i] = val; 1961 } 1962 } 1963 } else { /* stored by columns */ 1964 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1965 } 1966 } else { /* either full or lower-triangular (not packed) */ 1967 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1968 PetscInt i,j,N=mumps->id.size_schur; 1969 for (i=0;i<N;i++) { 1970 for (j=i;j<N;j++) { 1971 #if !defined(PETSC_USE_COMPLEX) 1972 PetscScalar val = mumps->id.schur[i*N+j]; 1973 #else 1974 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1975 #endif 1976 array[i*N+j] = val; 1977 array[j*N+i] = val; 1978 } 1979 } 1980 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1981 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1982 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1983 PetscInt i,j,N=mumps->id.size_schur; 1984 for (i=0;i<N;i++) { 1985 for (j=0;j<i+1;j++) { 1986 #if !defined(PETSC_USE_COMPLEX) 1987 PetscScalar val = mumps->id.schur[i*N+j]; 1988 #else 1989 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1990 #endif 1991 array[i*N+j] = val; 1992 array[j*N+i] = val; 1993 } 1994 } 1995 } 1996 } 1997 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1998 *S = St; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /* -------------------------------------------------------------------------------------------*/ 2003 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2004 { 2005 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2006 2007 PetscFunctionBegin; 2008 mumps->id.ICNTL(icntl) = ival; 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2013 { 2014 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2015 2016 PetscFunctionBegin; 2017 *ival = mumps->id.ICNTL(icntl); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 /*@ 2022 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2023 2024 Logically Collective on Mat 2025 2026 Input Parameters: 2027 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2028 . icntl - index of MUMPS parameter array ICNTL() 2029 - ival - value of MUMPS ICNTL(icntl) 2030 2031 Options Database: 2032 . -mat_mumps_icntl_<icntl> <ival> 2033 2034 Level: beginner 2035 2036 References: 2037 . MUMPS Users' Guide 2038 2039 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2040 @*/ 2041 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2042 { 2043 PetscErrorCode ierr; 2044 2045 PetscFunctionBegin; 2046 PetscValidType(F,1); 2047 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2048 PetscValidLogicalCollectiveInt(F,icntl,2); 2049 PetscValidLogicalCollectiveInt(F,ival,3); 2050 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /*@ 2055 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2056 2057 Logically Collective on Mat 2058 2059 Input Parameters: 2060 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2061 - icntl - index of MUMPS parameter array ICNTL() 2062 2063 Output Parameter: 2064 . ival - value of MUMPS ICNTL(icntl) 2065 2066 Level: beginner 2067 2068 References: 2069 . MUMPS Users' Guide 2070 2071 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2072 @*/ 2073 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2074 { 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 PetscValidType(F,1); 2079 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2080 PetscValidLogicalCollectiveInt(F,icntl,2); 2081 PetscValidIntPointer(ival,3); 2082 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 /* -------------------------------------------------------------------------------------------*/ 2087 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2088 { 2089 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2090 2091 PetscFunctionBegin; 2092 mumps->id.CNTL(icntl) = val; 2093 PetscFunctionReturn(0); 2094 } 2095 2096 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2097 { 2098 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2099 2100 PetscFunctionBegin; 2101 *val = mumps->id.CNTL(icntl); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /*@ 2106 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2107 2108 Logically Collective on Mat 2109 2110 Input Parameters: 2111 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2112 . icntl - index of MUMPS parameter array CNTL() 2113 - val - value of MUMPS CNTL(icntl) 2114 2115 Options Database: 2116 . -mat_mumps_cntl_<icntl> <val> 2117 2118 Level: beginner 2119 2120 References: 2121 . MUMPS Users' Guide 2122 2123 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2124 @*/ 2125 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2126 { 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 PetscValidType(F,1); 2131 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2132 PetscValidLogicalCollectiveInt(F,icntl,2); 2133 PetscValidLogicalCollectiveReal(F,val,3); 2134 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /*@ 2139 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2140 2141 Logically Collective on Mat 2142 2143 Input Parameters: 2144 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2145 - icntl - index of MUMPS parameter array CNTL() 2146 2147 Output Parameter: 2148 . val - value of MUMPS CNTL(icntl) 2149 2150 Level: beginner 2151 2152 References: 2153 . MUMPS Users' Guide 2154 2155 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2156 @*/ 2157 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2158 { 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 PetscValidType(F,1); 2163 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2164 PetscValidLogicalCollectiveInt(F,icntl,2); 2165 PetscValidRealPointer(val,3); 2166 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2171 { 2172 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2173 2174 PetscFunctionBegin; 2175 *info = mumps->id.INFO(icntl); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2180 { 2181 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2182 2183 PetscFunctionBegin; 2184 *infog = mumps->id.INFOG(icntl); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2189 { 2190 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2191 2192 PetscFunctionBegin; 2193 *rinfo = mumps->id.RINFO(icntl); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2198 { 2199 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2200 2201 PetscFunctionBegin; 2202 *rinfog = mumps->id.RINFOG(icntl); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2207 { 2208 PetscErrorCode ierr; 2209 Mat Bt = NULL,Btseq = NULL; 2210 PetscBool flg; 2211 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2212 PetscScalar *aa; 2213 PetscInt spnr,*ia,*ja; 2214 2215 PetscFunctionBegin; 2216 PetscValidIntPointer(spRHS,2); 2217 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2218 if (flg) { 2219 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2220 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2221 2222 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2223 2224 if (mumps->petsc_size > 1) { 2225 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2226 Btseq = b->A; 2227 } else { 2228 Btseq = Bt; 2229 } 2230 2231 if (!mumps->myid) { 2232 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2233 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2234 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2235 2236 mumps->id.irhs_ptr = ia; 2237 mumps->id.irhs_sparse = ja; 2238 mumps->id.nz_rhs = ia[spnr] - 1; 2239 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2240 } else { 2241 mumps->id.irhs_ptr = NULL; 2242 mumps->id.irhs_sparse = NULL; 2243 mumps->id.nz_rhs = 0; 2244 mumps->id.rhs_sparse = NULL; 2245 } 2246 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2247 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2248 2249 /* solve phase */ 2250 /*-------------*/ 2251 mumps->id.job = JOB_SOLVE; 2252 PetscMUMPS_c(mumps); 2253 if (mumps->id.INFOG(1) < 0) 2254 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2255 2256 if (!mumps->myid) { 2257 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2258 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2259 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 /*@ 2265 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2266 2267 Logically Collective on Mat 2268 2269 Input Parameters: 2270 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2271 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2272 2273 Output Parameter: 2274 . spRHS - requested entries of inverse of A 2275 2276 Level: beginner 2277 2278 References: 2279 . MUMPS Users' Guide 2280 2281 .seealso: MatGetFactor(), MatCreateTranspose() 2282 @*/ 2283 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2284 { 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 PetscValidType(F,1); 2289 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2290 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2295 { 2296 PetscErrorCode ierr; 2297 Mat spRHS; 2298 2299 PetscFunctionBegin; 2300 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2301 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2302 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 /*@ 2307 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2308 2309 Logically Collective on Mat 2310 2311 Input Parameters: 2312 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2313 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2314 2315 Output Parameter: 2316 . spRHST - requested entries of inverse of A^T 2317 2318 Level: beginner 2319 2320 References: 2321 . MUMPS Users' Guide 2322 2323 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2324 @*/ 2325 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2326 { 2327 PetscErrorCode ierr; 2328 PetscBool flg; 2329 2330 PetscFunctionBegin; 2331 PetscValidType(F,1); 2332 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2333 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2334 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2335 2336 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 /*@ 2341 MatMumpsGetInfo - Get MUMPS parameter INFO() 2342 2343 Logically Collective on Mat 2344 2345 Input Parameters: 2346 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2347 - icntl - index of MUMPS parameter array INFO() 2348 2349 Output Parameter: 2350 . ival - value of MUMPS INFO(icntl) 2351 2352 Level: beginner 2353 2354 References: 2355 . MUMPS Users' Guide 2356 2357 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2358 @*/ 2359 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2360 { 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 PetscValidType(F,1); 2365 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2366 PetscValidIntPointer(ival,3); 2367 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 /*@ 2372 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2373 2374 Logically Collective on Mat 2375 2376 Input Parameters: 2377 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2378 - icntl - index of MUMPS parameter array INFOG() 2379 2380 Output Parameter: 2381 . ival - value of MUMPS INFOG(icntl) 2382 2383 Level: beginner 2384 2385 References: 2386 . MUMPS Users' Guide 2387 2388 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2389 @*/ 2390 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2391 { 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 PetscValidType(F,1); 2396 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2397 PetscValidIntPointer(ival,3); 2398 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 /*@ 2403 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2404 2405 Logically Collective on Mat 2406 2407 Input Parameters: 2408 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2409 - icntl - index of MUMPS parameter array RINFO() 2410 2411 Output Parameter: 2412 . val - value of MUMPS RINFO(icntl) 2413 2414 Level: beginner 2415 2416 References: 2417 . MUMPS Users' Guide 2418 2419 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2420 @*/ 2421 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2422 { 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 PetscValidType(F,1); 2427 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2428 PetscValidRealPointer(val,3); 2429 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 /*@ 2434 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2435 2436 Logically Collective on Mat 2437 2438 Input Parameters: 2439 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2440 - icntl - index of MUMPS parameter array RINFOG() 2441 2442 Output Parameter: 2443 . val - value of MUMPS RINFOG(icntl) 2444 2445 Level: beginner 2446 2447 References: 2448 . MUMPS Users' Guide 2449 2450 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2451 @*/ 2452 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2453 { 2454 PetscErrorCode ierr; 2455 2456 PetscFunctionBegin; 2457 PetscValidType(F,1); 2458 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2459 PetscValidRealPointer(val,3); 2460 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 /*MC 2465 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2466 distributed and sequential matrices via the external package MUMPS. 2467 2468 Works with MATAIJ and MATSBAIJ matrices 2469 2470 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2471 2472 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2473 2474 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2475 2476 Options Database Keys: 2477 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2478 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2479 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2480 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2481 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2482 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2483 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2484 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2485 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2486 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2487 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2488 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2489 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2490 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2491 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2492 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2493 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2494 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2495 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2496 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2497 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2498 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2499 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2500 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2501 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2502 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2503 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2504 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2505 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2506 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2507 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2508 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2509 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2510 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2511 2512 Level: beginner 2513 2514 Notes: 2515 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2516 2517 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2518 $ KSPGetPC(ksp,&pc); 2519 $ PCFactorGetMatrix(pc,&mat); 2520 $ MatMumpsGetInfo(mat,....); 2521 $ MatMumpsGetInfog(mat,....); etc. 2522 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2523 2524 Two modes to run MUMPS/PETSc with OpenMP 2525 2526 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2527 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2528 2529 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2530 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2531 2532 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2533 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2534 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2535 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2536 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2537 2538 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2539 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2540 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2541 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2542 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2543 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2544 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2545 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2546 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2547 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2548 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2549 examine the mapping result. 2550 2551 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2552 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2553 calls omp_set_num_threads(m) internally before calling MUMPS. 2554 2555 References: 2556 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2557 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2558 2559 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2560 2561 M*/ 2562 2563 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2564 { 2565 PetscFunctionBegin; 2566 *type = MATSOLVERMUMPS; 2567 PetscFunctionReturn(0); 2568 } 2569 2570 /* MatGetFactor for Seq and MPI AIJ matrices */ 2571 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2572 { 2573 Mat B; 2574 PetscErrorCode ierr; 2575 Mat_MUMPS *mumps; 2576 PetscBool isSeqAIJ; 2577 2578 PetscFunctionBegin; 2579 /* Create the factorization matrix */ 2580 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2581 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2582 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2583 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2584 ierr = MatSetUp(B);CHKERRQ(ierr); 2585 2586 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2587 2588 B->ops->view = MatView_MUMPS; 2589 B->ops->getinfo = MatGetInfo_MUMPS; 2590 2591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2604 2605 if (ftype == MAT_FACTOR_LU) { 2606 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2607 B->factortype = MAT_FACTOR_LU; 2608 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2609 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2610 mumps->sym = 0; 2611 } else { 2612 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2613 B->factortype = MAT_FACTOR_CHOLESKY; 2614 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2615 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2616 #if defined(PETSC_USE_COMPLEX) 2617 mumps->sym = 2; 2618 #else 2619 if (A->spd_set && A->spd) mumps->sym = 1; 2620 else mumps->sym = 2; 2621 #endif 2622 } 2623 2624 /* set solvertype */ 2625 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2626 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2627 2628 B->ops->destroy = MatDestroy_MUMPS; 2629 B->data = (void*)mumps; 2630 2631 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2632 2633 *F = B; 2634 PetscFunctionReturn(0); 2635 } 2636 2637 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2638 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2639 { 2640 Mat B; 2641 PetscErrorCode ierr; 2642 Mat_MUMPS *mumps; 2643 PetscBool isSeqSBAIJ; 2644 2645 PetscFunctionBegin; 2646 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2647 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2648 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2649 /* Create the factorization matrix */ 2650 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2651 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2652 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2653 ierr = MatSetUp(B);CHKERRQ(ierr); 2654 2655 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2656 if (isSeqSBAIJ) { 2657 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2658 } else { 2659 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2660 } 2661 2662 B->ops->getinfo = MatGetInfo_External; 2663 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2664 B->ops->view = MatView_MUMPS; 2665 2666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2679 2680 B->factortype = MAT_FACTOR_CHOLESKY; 2681 #if defined(PETSC_USE_COMPLEX) 2682 mumps->sym = 2; 2683 #else 2684 if (A->spd_set && A->spd) mumps->sym = 1; 2685 else mumps->sym = 2; 2686 #endif 2687 2688 /* set solvertype */ 2689 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2690 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2691 2692 B->ops->destroy = MatDestroy_MUMPS; 2693 B->data = (void*)mumps; 2694 2695 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2696 2697 *F = B; 2698 PetscFunctionReturn(0); 2699 } 2700 2701 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2702 { 2703 Mat B; 2704 PetscErrorCode ierr; 2705 Mat_MUMPS *mumps; 2706 PetscBool isSeqBAIJ; 2707 2708 PetscFunctionBegin; 2709 /* Create the factorization matrix */ 2710 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2711 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2712 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2713 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2714 ierr = MatSetUp(B);CHKERRQ(ierr); 2715 2716 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2717 if (ftype == MAT_FACTOR_LU) { 2718 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2719 B->factortype = MAT_FACTOR_LU; 2720 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2721 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2722 mumps->sym = 0; 2723 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2724 2725 B->ops->getinfo = MatGetInfo_External; 2726 B->ops->view = MatView_MUMPS; 2727 2728 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2729 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2730 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2741 2742 /* set solvertype */ 2743 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2744 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2745 2746 B->ops->destroy = MatDestroy_MUMPS; 2747 B->data = (void*)mumps; 2748 2749 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2750 2751 *F = B; 2752 PetscFunctionReturn(0); 2753 } 2754 2755 /* MatGetFactor for Seq and MPI SELL matrices */ 2756 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2757 { 2758 Mat B; 2759 PetscErrorCode ierr; 2760 Mat_MUMPS *mumps; 2761 PetscBool isSeqSELL; 2762 2763 PetscFunctionBegin; 2764 /* Create the factorization matrix */ 2765 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2766 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2767 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2768 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2769 ierr = MatSetUp(B);CHKERRQ(ierr); 2770 2771 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2772 2773 B->ops->view = MatView_MUMPS; 2774 B->ops->getinfo = MatGetInfo_MUMPS; 2775 2776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2782 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2783 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2784 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2785 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2786 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2787 2788 if (ftype == MAT_FACTOR_LU) { 2789 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2790 B->factortype = MAT_FACTOR_LU; 2791 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2792 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2793 mumps->sym = 0; 2794 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2795 2796 /* set solvertype */ 2797 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2798 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2799 2800 B->ops->destroy = MatDestroy_MUMPS; 2801 B->data = (void*)mumps; 2802 2803 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2804 2805 *F = B; 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2810 { 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2815 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2816 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2817 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2818 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2819 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2820 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2821 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2822 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2823 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2824 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2825 PetscFunctionReturn(0); 2826 } 2827 2828