1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 #if defined(PETSC_USE_COMPLEX) 344 PetscBool hermitian; 345 #endif 346 347 PetscFunctionBegin; 348 #if defined(PETSC_USE_COMPLEX) 349 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 350 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 351 #endif 352 *v = aa->a; 353 if (reuse == MAT_INITIAL_MATRIX) { 354 nz = aa->nz; 355 ai = aa->i; 356 aj = aa->j; 357 *v = aa->a; 358 *nnz = nz; 359 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 360 col = row + nz; 361 362 nz = 0; 363 for (i=0; i<M; i++) { 364 rnz = ai[i+1] - ai[i]; 365 ajj = aj + ai[i]; 366 for (j=0; j<rnz; j++) { 367 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 368 } 369 } 370 *r = row; *c = col; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 376 { 377 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 378 PetscInt nz,rnz,i,j; 379 const PetscScalar *av,*v1; 380 PetscScalar *val; 381 PetscErrorCode ierr; 382 PetscInt *row,*col; 383 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 384 PetscBool missing; 385 #if defined(PETSC_USE_COMPLEX) 386 PetscBool hermitian; 387 #endif 388 389 PetscFunctionBegin; 390 #if defined(PETSC_USE_COMPLEX) 391 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 392 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 393 #endif 394 ai = aa->i; aj = aa->j; av = aa->a; 395 adiag = aa->diag; 396 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 397 if (reuse == MAT_INITIAL_MATRIX) { 398 /* count nz in the upper triangular part of A */ 399 nz = 0; 400 if (missing) { 401 for (i=0; i<M; i++) { 402 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 403 for (j=ai[i];j<ai[i+1];j++) { 404 if (aj[j] < i) continue; 405 nz++; 406 } 407 } else { 408 nz += ai[i+1] - adiag[i]; 409 } 410 } 411 } else { 412 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 413 } 414 *nnz = nz; 415 416 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 417 col = row + nz; 418 val = (PetscScalar*)(col + nz); 419 420 nz = 0; 421 if (missing) { 422 for (i=0; i<M; i++) { 423 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 424 for (j=ai[i];j<ai[i+1];j++) { 425 if (aj[j] < i) continue; 426 row[nz] = i+shift; 427 col[nz] = aj[j]+shift; 428 val[nz] = av[j]; 429 nz++; 430 } 431 } else { 432 rnz = ai[i+1] - adiag[i]; 433 ajj = aj + adiag[i]; 434 v1 = av + adiag[i]; 435 for (j=0; j<rnz; j++) { 436 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 437 } 438 } 439 } 440 } else { 441 for (i=0; i<M; i++) { 442 rnz = ai[i+1] - adiag[i]; 443 ajj = aj + adiag[i]; 444 v1 = av + adiag[i]; 445 for (j=0; j<rnz; j++) { 446 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 447 } 448 } 449 } 450 *r = row; *c = col; *v = val; 451 } else { 452 nz = 0; val = *v; 453 if (missing) { 454 for (i=0; i <M; i++) { 455 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 456 for (j=ai[i];j<ai[i+1];j++) { 457 if (aj[j] < i) continue; 458 val[nz++] = av[j]; 459 } 460 } else { 461 rnz = ai[i+1] - adiag[i]; 462 v1 = av + adiag[i]; 463 for (j=0; j<rnz; j++) { 464 val[nz++] = v1[j]; 465 } 466 } 467 } 468 } else { 469 for (i=0; i <M; i++) { 470 rnz = ai[i+1] - adiag[i]; 471 v1 = av + adiag[i]; 472 for (j=0; j<rnz; j++) { 473 val[nz++] = v1[j]; 474 } 475 } 476 } 477 } 478 PetscFunctionReturn(0); 479 } 480 481 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 482 { 483 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 484 PetscErrorCode ierr; 485 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 486 PetscInt *row,*col; 487 const PetscScalar *av, *bv,*v1,*v2; 488 PetscScalar *val; 489 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 490 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 491 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 492 #if defined(PETSC_USE_COMPLEX) 493 PetscBool hermitian; 494 #endif 495 496 PetscFunctionBegin; 497 #if defined(PETSC_USE_COMPLEX) 498 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 499 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 500 #endif 501 rstart = A->rmap->rstart; 502 ai = aa->i; 503 aj = aa->j; 504 bi = bb->i; 505 bj = bb->j; 506 av = aa->a; 507 bv = bb->a; 508 509 garray = mat->garray; 510 511 if (reuse == MAT_INITIAL_MATRIX) { 512 nz = aa->nz + bb->nz; 513 *nnz = nz; 514 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 515 col = row + nz; 516 val = (PetscScalar*)(col + nz); 517 518 *r = row; *c = col; *v = val; 519 } else { 520 row = *r; col = *c; val = *v; 521 } 522 523 jj = 0; irow = rstart; 524 for (i=0; i<m; i++) { 525 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 526 countA = ai[i+1] - ai[i]; 527 countB = bi[i+1] - bi[i]; 528 bjj = bj + bi[i]; 529 v1 = av + ai[i]; 530 v2 = bv + bi[i]; 531 532 /* A-part */ 533 for (j=0; j<countA; j++) { 534 if (reuse == MAT_INITIAL_MATRIX) { 535 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 536 } 537 val[jj++] = v1[j]; 538 } 539 540 /* B-part */ 541 for (j=0; j < countB; j++) { 542 if (reuse == MAT_INITIAL_MATRIX) { 543 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 544 } 545 val[jj++] = v2[j]; 546 } 547 irow++; 548 } 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 553 { 554 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 555 PetscErrorCode ierr; 556 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 557 PetscInt *row,*col; 558 const PetscScalar *av, *bv,*v1,*v2; 559 PetscScalar *val; 560 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 561 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 562 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 563 564 PetscFunctionBegin; 565 rstart = A->rmap->rstart; 566 ai = aa->i; 567 aj = aa->j; 568 bi = bb->i; 569 bj = bb->j; 570 av = aa->a; 571 bv = bb->a; 572 573 garray = mat->garray; 574 575 if (reuse == MAT_INITIAL_MATRIX) { 576 nz = aa->nz + bb->nz; 577 *nnz = nz; 578 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 579 col = row + nz; 580 val = (PetscScalar*)(col + nz); 581 582 *r = row; *c = col; *v = val; 583 } else { 584 row = *r; col = *c; val = *v; 585 } 586 587 jj = 0; irow = rstart; 588 for (i=0; i<m; i++) { 589 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 590 countA = ai[i+1] - ai[i]; 591 countB = bi[i+1] - bi[i]; 592 bjj = bj + bi[i]; 593 v1 = av + ai[i]; 594 v2 = bv + bi[i]; 595 596 /* A-part */ 597 for (j=0; j<countA; j++) { 598 if (reuse == MAT_INITIAL_MATRIX) { 599 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 600 } 601 val[jj++] = v1[j]; 602 } 603 604 /* B-part */ 605 for (j=0; j < countB; j++) { 606 if (reuse == MAT_INITIAL_MATRIX) { 607 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 608 } 609 val[jj++] = v2[j]; 610 } 611 irow++; 612 } 613 PetscFunctionReturn(0); 614 } 615 616 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 617 { 618 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 619 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 620 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 621 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 622 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 623 const PetscInt bs2=mat->bs2; 624 PetscErrorCode ierr; 625 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 626 PetscInt *row,*col; 627 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 628 PetscScalar *val; 629 630 PetscFunctionBegin; 631 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 632 if (reuse == MAT_INITIAL_MATRIX) { 633 nz = bs2*(aa->nz + bb->nz); 634 *nnz = nz; 635 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 636 col = row + nz; 637 val = (PetscScalar*)(col + nz); 638 639 *r = row; *c = col; *v = val; 640 } else { 641 row = *r; col = *c; val = *v; 642 } 643 644 jj = 0; irow = rstart; 645 for (i=0; i<mbs; i++) { 646 countA = ai[i+1] - ai[i]; 647 countB = bi[i+1] - bi[i]; 648 ajj = aj + ai[i]; 649 bjj = bj + bi[i]; 650 v1 = av + bs2*ai[i]; 651 v2 = bv + bs2*bi[i]; 652 653 idx = 0; 654 /* A-part */ 655 for (k=0; k<countA; k++) { 656 for (j=0; j<bs; j++) { 657 for (n=0; n<bs; n++) { 658 if (reuse == MAT_INITIAL_MATRIX) { 659 row[jj] = irow + n + shift; 660 col[jj] = rstart + bs*ajj[k] + j + shift; 661 } 662 val[jj++] = v1[idx++]; 663 } 664 } 665 } 666 667 idx = 0; 668 /* B-part */ 669 for (k=0; k<countB; k++) { 670 for (j=0; j<bs; j++) { 671 for (n=0; n<bs; n++) { 672 if (reuse == MAT_INITIAL_MATRIX) { 673 row[jj] = irow + n + shift; 674 col[jj] = bs*garray[bjj[k]] + j + shift; 675 } 676 val[jj++] = v2[idx++]; 677 } 678 } 679 } 680 irow += bs; 681 } 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 686 { 687 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 688 PetscErrorCode ierr; 689 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 690 PetscInt *row,*col; 691 const PetscScalar *av, *bv,*v1,*v2; 692 PetscScalar *val; 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 694 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 695 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 696 #if defined(PETSC_USE_COMPLEX) 697 PetscBool hermitian; 698 #endif 699 700 PetscFunctionBegin; 701 #if defined(PETSC_USE_COMPLEX) 702 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 703 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 704 #endif 705 ai = aa->i; 706 aj = aa->j; 707 adiag = aa->diag; 708 bi = bb->i; 709 bj = bb->j; 710 garray = mat->garray; 711 av = aa->a; 712 bv = bb->a; 713 714 rstart = A->rmap->rstart; 715 716 if (reuse == MAT_INITIAL_MATRIX) { 717 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 718 nzb = 0; /* num of upper triangular entries in mat->B */ 719 for (i=0; i<m; i++) { 720 nza += (ai[i+1] - adiag[i]); 721 countB = bi[i+1] - bi[i]; 722 bjj = bj + bi[i]; 723 for (j=0; j<countB; j++) { 724 if (garray[bjj[j]] > rstart) nzb++; 725 } 726 } 727 728 nz = nza + nzb; /* total nz of upper triangular part of mat */ 729 *nnz = nz; 730 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 731 col = row + nz; 732 val = (PetscScalar*)(col + nz); 733 734 *r = row; *c = col; *v = val; 735 } else { 736 row = *r; col = *c; val = *v; 737 } 738 739 jj = 0; irow = rstart; 740 for (i=0; i<m; i++) { 741 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 742 v1 = av + adiag[i]; 743 countA = ai[i+1] - adiag[i]; 744 countB = bi[i+1] - bi[i]; 745 bjj = bj + bi[i]; 746 v2 = bv + bi[i]; 747 748 /* A-part */ 749 for (j=0; j<countA; j++) { 750 if (reuse == MAT_INITIAL_MATRIX) { 751 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 752 } 753 val[jj++] = v1[j]; 754 } 755 756 /* B-part */ 757 for (j=0; j < countB; j++) { 758 if (garray[bjj[j]] > rstart) { 759 if (reuse == MAT_INITIAL_MATRIX) { 760 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 761 } 762 val[jj++] = v2[j]; 763 } 764 } 765 irow++; 766 } 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatDestroy_MUMPS(Mat A) 771 { 772 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 777 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 778 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 779 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 780 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 781 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 782 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 783 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 784 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 785 mumps->id.job = JOB_END; 786 PetscMUMPS_c(mumps); 787 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 788 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 789 #endif 790 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 791 ierr = PetscFree(A->data);CHKERRQ(ierr); 792 793 /* clear composed functions */ 794 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 797 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 798 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 799 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 800 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 801 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 802 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 803 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 804 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 805 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 806 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 811 { 812 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 813 PetscScalar *array; 814 Vec b_seq; 815 IS is_iden,is_petsc; 816 PetscErrorCode ierr; 817 PetscInt i; 818 PetscBool second_solve = PETSC_FALSE; 819 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 820 821 PetscFunctionBegin; 822 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 823 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 824 825 if (A->factorerrortype) { 826 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 827 ierr = VecSetInf(x);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 mumps->id.ICNTL(20)= 0; /* dense RHS */ 832 mumps->id.nrhs = 1; 833 b_seq = mumps->b_seq; 834 if (mumps->petsc_size > 1) { 835 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 836 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 837 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 839 } else { /* petsc_size == 1 */ 840 ierr = VecCopy(b,x);CHKERRQ(ierr); 841 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 842 } 843 if (!mumps->myid) { /* define rhs on the host */ 844 mumps->id.nrhs = 1; 845 mumps->id.rhs = (MumpsScalar*)array; 846 } 847 848 /* 849 handle condensation step of Schur complement (if any) 850 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 851 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 852 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 853 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 854 */ 855 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 856 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 857 second_solve = PETSC_TRUE; 858 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 859 } 860 /* solve phase */ 861 /*-------------*/ 862 mumps->id.job = JOB_SOLVE; 863 PetscMUMPS_c(mumps); 864 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 865 866 /* handle expansion step of Schur complement (if any) */ 867 if (second_solve) { 868 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 869 } 870 871 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 872 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 873 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 874 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 875 } 876 if (!mumps->scat_sol) { /* create scatter scat_sol */ 877 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 878 for (i=0; i<mumps->id.lsol_loc; i++) { 879 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 880 } 881 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 882 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 884 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 885 886 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 887 } 888 889 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 } 892 893 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 894 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 895 896 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 901 { 902 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 mumps->id.ICNTL(9) = 0; 907 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 908 mumps->id.ICNTL(9) = 1; 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 913 { 914 PetscErrorCode ierr; 915 Mat Bt = NULL; 916 PetscBool flg, flgT; 917 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 918 PetscInt i,nrhs,M; 919 PetscScalar *array; 920 const PetscScalar *rbray; 921 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 922 PetscScalar *bray,*sol_loc,*sol_loc_save; 923 IS is_to,is_from; 924 PetscInt k,proc,j,m,myrstart; 925 const PetscInt *rstart; 926 Vec v_mpi,b_seq,msol_loc; 927 VecScatter scat_rhs,scat_sol; 928 PetscScalar *aa; 929 PetscInt spnr,*ia,*ja; 930 Mat_MPIAIJ *b = NULL; 931 932 PetscFunctionBegin; 933 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 934 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 935 936 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 937 if (flg) { /* dense B */ 938 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 939 mumps->id.ICNTL(20)= 0; /* dense RHS */ 940 } else { /* sparse B */ 941 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 942 if (flgT) { /* input B is transpose of actural RHS matrix, 943 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 944 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 945 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 946 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 947 } 948 949 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 950 mumps->id.nrhs = nrhs; 951 mumps->id.lrhs = M; 952 mumps->id.rhs = NULL; 953 954 if (mumps->petsc_size == 1) { 955 PetscScalar *aa; 956 PetscInt spnr,*ia,*ja; 957 PetscBool second_solve = PETSC_FALSE; 958 959 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 960 mumps->id.rhs = (MumpsScalar*)array; 961 962 if (!Bt) { /* dense B */ 963 /* copy B to X */ 964 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 965 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 966 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 967 } else { /* sparse B */ 968 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 969 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 970 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 971 /* mumps requires ia and ja start at 1! */ 972 mumps->id.irhs_ptr = ia; 973 mumps->id.irhs_sparse = ja; 974 mumps->id.nz_rhs = ia[spnr] - 1; 975 mumps->id.rhs_sparse = (MumpsScalar*)aa; 976 } 977 /* handle condensation step of Schur complement (if any) */ 978 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 979 second_solve = PETSC_TRUE; 980 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 981 } 982 /* solve phase */ 983 /*-------------*/ 984 mumps->id.job = JOB_SOLVE; 985 PetscMUMPS_c(mumps); 986 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 987 988 /* handle expansion step of Schur complement (if any) */ 989 if (second_solve) { 990 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 if (Bt) { /* sparse B */ 993 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 994 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 995 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 996 } 997 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1002 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1003 1004 /* create msol_loc to hold mumps local solution */ 1005 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1006 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1007 1008 lsol_loc = mumps->id.lsol_loc; 1009 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1010 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1011 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1012 mumps->id.isol_loc = isol_loc; 1013 1014 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1015 1016 if (!Bt) { /* dense B */ 1017 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1018 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1019 0, re-arrange B into desired order, which is a local operation. 1020 */ 1021 1022 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1023 /* wrap dense rhs matrix B into a vector v_mpi */ 1024 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1025 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1026 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1027 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1028 1029 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1030 if (!mumps->myid) { 1031 PetscInt *idx; 1032 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1033 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1034 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1035 k = 0; 1036 for (proc=0; proc<mumps->petsc_size; proc++){ 1037 for (j=0; j<nrhs; j++){ 1038 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1039 } 1040 } 1041 1042 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1043 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1044 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1045 } else { 1046 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1047 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1048 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1049 } 1050 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1051 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1053 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 1056 if (!mumps->myid) { /* define rhs on the host */ 1057 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1058 mumps->id.rhs = (MumpsScalar*)bray; 1059 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1060 } 1061 1062 } else { /* sparse B */ 1063 b = (Mat_MPIAIJ*)Bt->data; 1064 1065 /* wrap dense X into a vector v_mpi */ 1066 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1067 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1068 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1069 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1070 1071 if (!mumps->myid) { 1072 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1073 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1074 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1075 /* mumps requires ia and ja start at 1! */ 1076 mumps->id.irhs_ptr = ia; 1077 mumps->id.irhs_sparse = ja; 1078 mumps->id.nz_rhs = ia[spnr] - 1; 1079 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1080 } else { 1081 mumps->id.irhs_ptr = NULL; 1082 mumps->id.irhs_sparse = NULL; 1083 mumps->id.nz_rhs = 0; 1084 mumps->id.rhs_sparse = NULL; 1085 } 1086 } 1087 1088 /* solve phase */ 1089 /*-------------*/ 1090 mumps->id.job = JOB_SOLVE; 1091 PetscMUMPS_c(mumps); 1092 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1093 1094 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1095 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1096 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1097 1098 /* create scatter scat_sol */ 1099 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1100 /* iidx: index for scatter mumps solution to petsc X */ 1101 1102 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1103 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1104 for (i=0; i<lsol_loc; i++) { 1105 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1106 1107 for (proc=0; proc<mumps->petsc_size; proc++){ 1108 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1109 myrstart = rstart[proc]; 1110 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1111 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1112 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1113 break; 1114 } 1115 } 1116 1117 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1118 } 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1120 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1121 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1122 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1123 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1124 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1126 1127 /* free spaces */ 1128 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1129 mumps->id.isol_loc = isol_loc_save; 1130 1131 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1132 ierr = PetscFree(idxx);CHKERRQ(ierr); 1133 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1134 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1135 if (Bt) { 1136 if (!mumps->myid) { 1137 b = (Mat_MPIAIJ*)Bt->data; 1138 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1139 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1140 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1141 } 1142 } else { 1143 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1145 } 1146 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1147 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1152 { 1153 PetscErrorCode ierr; 1154 PetscBool flg; 1155 Mat B; 1156 1157 PetscFunctionBegin; 1158 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1159 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1160 1161 /* Create B=Bt^T that uses Bt's data structure */ 1162 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1163 1164 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1165 ierr = MatDestroy(&B);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #if !defined(PETSC_USE_COMPLEX) 1170 /* 1171 input: 1172 F: numeric factor 1173 output: 1174 nneg: total number of negative pivots 1175 nzero: total number of zero pivots 1176 npos: (global dimension of F) - nneg - nzero 1177 */ 1178 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1179 { 1180 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1181 PetscErrorCode ierr; 1182 PetscMPIInt size; 1183 1184 PetscFunctionBegin; 1185 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1186 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1187 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1188 1189 if (nneg) *nneg = mumps->id.INFOG(12); 1190 if (nzero || npos) { 1191 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1192 if (nzero) *nzero = mumps->id.INFOG(28); 1193 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 #endif 1198 1199 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1200 { 1201 PetscErrorCode ierr; 1202 PetscInt i,nz=0,*irn,*jcn=0; 1203 PetscScalar *val=0; 1204 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1205 1206 PetscFunctionBegin; 1207 if (mumps->omp_comm_size > 1) { 1208 if (reuse == MAT_INITIAL_MATRIX) { 1209 /* master first gathers counts of nonzeros to receive */ 1210 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1211 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1212 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1213 1214 /* master allocates memory to receive nonzeros */ 1215 if (mumps->is_omp_master) { 1216 displs[0] = 0; 1217 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1218 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1219 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1220 jcn = irn + nz; 1221 val = (PetscScalar*)(jcn + nz); 1222 } 1223 1224 /* save the gatherv plan */ 1225 mumps->mpinz = mpinz; /* used as send count */ 1226 mumps->recvcount = recvcount; 1227 mumps->displs = displs; 1228 1229 /* master gathers nonzeros */ 1230 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1231 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1232 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1233 1234 /* master frees its row/col/val and replaces them with bigger arrays */ 1235 if (mumps->is_omp_master) { 1236 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1237 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1238 mumps->irn = irn; 1239 mumps->jcn = jcn; 1240 mumps->val = val; 1241 } 1242 } else { 1243 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1244 } 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1250 { 1251 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1252 PetscErrorCode ierr; 1253 PetscBool isMPIAIJ; 1254 1255 PetscFunctionBegin; 1256 if (mumps->id.INFOG(1) < 0) { 1257 if (mumps->id.INFOG(1) == -6) { 1258 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1259 } 1260 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1265 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1266 1267 /* numerical factorization phase */ 1268 /*-------------------------------*/ 1269 mumps->id.job = JOB_FACTNUMERIC; 1270 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1271 if (!mumps->myid) { 1272 mumps->id.a = (MumpsScalar*)mumps->val; 1273 } 1274 } else { 1275 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1276 } 1277 PetscMUMPS_c(mumps); 1278 if (mumps->id.INFOG(1) < 0) { 1279 if (A->erroriffailure) { 1280 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1281 } else { 1282 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1283 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1284 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1285 } else if (mumps->id.INFOG(1) == -13) { 1286 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1287 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1288 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1289 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1290 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1291 } else { 1292 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1293 F->factorerrortype = MAT_FACTOR_OTHER; 1294 } 1295 } 1296 } 1297 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1298 1299 F->assembled = PETSC_TRUE; 1300 mumps->matstruc = SAME_NONZERO_PATTERN; 1301 if (F->schur) { /* reset Schur status to unfactored */ 1302 #if defined(PETSC_HAVE_CUDA) 1303 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1304 #endif 1305 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1306 mumps->id.ICNTL(19) = 2; 1307 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1308 } 1309 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1310 } 1311 1312 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1313 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1314 1315 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1316 if (mumps->petsc_size > 1) { 1317 PetscInt lsol_loc; 1318 PetscScalar *sol_loc; 1319 1320 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1321 1322 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1323 if (mumps->x_seq) { 1324 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1325 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1326 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1327 } 1328 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1329 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1330 mumps->id.lsol_loc = lsol_loc; 1331 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1332 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1333 } 1334 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 /* Sets MUMPS options from the options database */ 1339 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1340 { 1341 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1342 PetscErrorCode ierr; 1343 PetscInt icntl,info[80],i,ninfo=80; 1344 PetscBool flg; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1348 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1349 if (flg) mumps->id.ICNTL(1) = icntl; 1350 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1351 if (flg) mumps->id.ICNTL(2) = icntl; 1352 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1353 if (flg) mumps->id.ICNTL(3) = icntl; 1354 1355 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1356 if (flg) mumps->id.ICNTL(4) = icntl; 1357 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1358 1359 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1360 if (flg) mumps->id.ICNTL(6) = icntl; 1361 1362 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1363 if (flg) { 1364 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1365 else mumps->id.ICNTL(7) = icntl; 1366 } 1367 1368 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1369 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1370 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1371 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1372 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1373 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1374 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1375 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1376 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1377 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1378 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1379 } 1380 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1381 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1382 1383 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1384 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1386 if (mumps->id.ICNTL(24)) { 1387 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1388 } 1389 1390 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1391 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1392 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1393 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1394 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1395 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1396 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1397 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1398 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1399 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1400 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1401 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1402 1403 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1404 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1405 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1406 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1407 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1408 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1409 1410 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1411 1412 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1413 if (ninfo) { 1414 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1415 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1416 mumps->ninfo = ninfo; 1417 for (i=0; i<ninfo; i++) { 1418 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1419 else mumps->info[i] = info[i]; 1420 } 1421 } 1422 1423 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1428 { 1429 PetscErrorCode ierr; 1430 PetscInt nthreads=0; 1431 1432 PetscFunctionBegin; 1433 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1434 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1435 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1436 1437 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1438 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1439 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1440 if (mumps->use_petsc_omp_support) { 1441 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1442 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1443 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1444 #else 1445 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1446 #endif 1447 } else { 1448 mumps->omp_comm = PETSC_COMM_SELF; 1449 mumps->mumps_comm = mumps->petsc_comm; 1450 mumps->is_omp_master = PETSC_TRUE; 1451 } 1452 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1453 1454 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1455 mumps->id.job = JOB_INIT; 1456 mumps->id.par = 1; /* host participates factorizaton and solve */ 1457 mumps->id.sym = mumps->sym; 1458 1459 PetscMUMPS_c(mumps); 1460 1461 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1462 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1463 */ 1464 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1465 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1466 1467 mumps->scat_rhs = NULL; 1468 mumps->scat_sol = NULL; 1469 1470 /* set PETSc-MUMPS default options - override MUMPS default */ 1471 mumps->id.ICNTL(3) = 0; 1472 mumps->id.ICNTL(4) = 0; 1473 if (mumps->petsc_size == 1) { 1474 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1475 } else { 1476 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1477 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1478 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1479 } 1480 1481 /* schur */ 1482 mumps->id.size_schur = 0; 1483 mumps->id.listvar_schur = NULL; 1484 mumps->id.schur = NULL; 1485 mumps->sizeredrhs = 0; 1486 mumps->schur_sol = NULL; 1487 mumps->schur_sizesol = 0; 1488 PetscFunctionReturn(0); 1489 } 1490 1491 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1492 { 1493 PetscErrorCode ierr; 1494 1495 PetscFunctionBegin; 1496 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1497 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1498 if (mumps->id.INFOG(1) < 0) { 1499 if (A->erroriffailure) { 1500 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1501 } else { 1502 if (mumps->id.INFOG(1) == -6) { 1503 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1504 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1505 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1506 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1507 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1508 } else { 1509 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1510 F->factorerrortype = MAT_FACTOR_OTHER; 1511 } 1512 } 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1518 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1519 { 1520 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1521 PetscErrorCode ierr; 1522 Vec b; 1523 const PetscInt M = A->rmap->N; 1524 1525 PetscFunctionBegin; 1526 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1527 1528 /* Set MUMPS options from the options database */ 1529 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1530 1531 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1532 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1533 1534 /* analysis phase */ 1535 /*----------------*/ 1536 mumps->id.job = JOB_FACTSYMBOLIC; 1537 mumps->id.n = M; 1538 switch (mumps->id.ICNTL(18)) { 1539 case 0: /* centralized assembled matrix input */ 1540 if (!mumps->myid) { 1541 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1542 if (mumps->id.ICNTL(6)>1) { 1543 mumps->id.a = (MumpsScalar*)mumps->val; 1544 } 1545 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1546 /* 1547 PetscBool flag; 1548 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1549 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1550 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1551 */ 1552 if (!mumps->myid) { 1553 const PetscInt *idx; 1554 PetscInt i,*perm_in; 1555 1556 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1557 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1558 1559 mumps->id.perm_in = perm_in; 1560 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1561 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1562 } 1563 } 1564 } 1565 break; 1566 case 3: /* distributed assembled matrix input (size>1) */ 1567 mumps->id.nz_loc = mumps->nz; 1568 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1569 if (mumps->id.ICNTL(6)>1) { 1570 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1571 } 1572 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1573 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1574 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1575 ierr = VecDestroy(&b);CHKERRQ(ierr); 1576 break; 1577 } 1578 PetscMUMPS_c(mumps); 1579 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1580 1581 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1582 F->ops->solve = MatSolve_MUMPS; 1583 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1584 F->ops->matsolve = MatMatSolve_MUMPS; 1585 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1586 PetscFunctionReturn(0); 1587 } 1588 1589 /* Note the Petsc r and c permutations are ignored */ 1590 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1591 { 1592 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1593 PetscErrorCode ierr; 1594 Vec b; 1595 const PetscInt M = A->rmap->N; 1596 1597 PetscFunctionBegin; 1598 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1599 1600 /* Set MUMPS options from the options database */ 1601 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1602 1603 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1604 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1605 1606 /* analysis phase */ 1607 /*----------------*/ 1608 mumps->id.job = JOB_FACTSYMBOLIC; 1609 mumps->id.n = M; 1610 switch (mumps->id.ICNTL(18)) { 1611 case 0: /* centralized assembled matrix input */ 1612 if (!mumps->myid) { 1613 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1614 if (mumps->id.ICNTL(6)>1) { 1615 mumps->id.a = (MumpsScalar*)mumps->val; 1616 } 1617 } 1618 break; 1619 case 3: /* distributed assembled matrix input (size>1) */ 1620 mumps->id.nz_loc = mumps->nz; 1621 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1622 if (mumps->id.ICNTL(6)>1) { 1623 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1624 } 1625 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1626 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1627 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1628 ierr = VecDestroy(&b);CHKERRQ(ierr); 1629 break; 1630 } 1631 PetscMUMPS_c(mumps); 1632 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1633 1634 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1635 F->ops->solve = MatSolve_MUMPS; 1636 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 /* Note the Petsc r permutation and factor info are ignored */ 1641 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1642 { 1643 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1644 PetscErrorCode ierr; 1645 Vec b; 1646 const PetscInt M = A->rmap->N; 1647 1648 PetscFunctionBegin; 1649 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1650 1651 /* Set MUMPS options from the options database */ 1652 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1653 1654 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1655 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1656 1657 /* analysis phase */ 1658 /*----------------*/ 1659 mumps->id.job = JOB_FACTSYMBOLIC; 1660 mumps->id.n = M; 1661 switch (mumps->id.ICNTL(18)) { 1662 case 0: /* centralized assembled matrix input */ 1663 if (!mumps->myid) { 1664 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1665 if (mumps->id.ICNTL(6)>1) { 1666 mumps->id.a = (MumpsScalar*)mumps->val; 1667 } 1668 } 1669 break; 1670 case 3: /* distributed assembled matrix input (size>1) */ 1671 mumps->id.nz_loc = mumps->nz; 1672 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1673 if (mumps->id.ICNTL(6)>1) { 1674 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1675 } 1676 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1677 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1678 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1679 ierr = VecDestroy(&b);CHKERRQ(ierr); 1680 break; 1681 } 1682 PetscMUMPS_c(mumps); 1683 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1684 1685 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1686 F->ops->solve = MatSolve_MUMPS; 1687 F->ops->solvetranspose = MatSolve_MUMPS; 1688 F->ops->matsolve = MatMatSolve_MUMPS; 1689 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1690 #if defined(PETSC_USE_COMPLEX) 1691 F->ops->getinertia = NULL; 1692 #else 1693 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1694 #endif 1695 PetscFunctionReturn(0); 1696 } 1697 1698 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1699 { 1700 PetscErrorCode ierr; 1701 PetscBool iascii; 1702 PetscViewerFormat format; 1703 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1704 1705 PetscFunctionBegin; 1706 /* check if matrix is mumps type */ 1707 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1708 1709 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1710 if (iascii) { 1711 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1712 if (format == PETSC_VIEWER_ASCII_INFO) { 1713 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1714 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1715 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1716 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1717 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1718 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1719 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1720 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1721 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1722 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1723 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1724 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1725 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1726 if (mumps->id.ICNTL(11)>0) { 1727 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1728 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1729 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1730 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1731 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1732 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1733 } 1734 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1735 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1736 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1737 /* ICNTL(15-17) not used */ 1738 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1739 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1740 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1741 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1742 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1743 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1744 1745 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1746 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1748 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1749 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1750 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1751 1752 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1753 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1754 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1755 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1756 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1758 1759 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1760 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1762 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1763 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1764 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1765 1766 /* infomation local to each processor */ 1767 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1769 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1770 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1772 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1773 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1775 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1776 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1777 1778 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1779 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1780 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1781 1782 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1783 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1784 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1785 1786 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1787 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1788 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1789 1790 if (mumps->ninfo && mumps->ninfo <= 80){ 1791 PetscInt i; 1792 for (i=0; i<mumps->ninfo; i++){ 1793 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1795 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1796 } 1797 } 1798 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1799 1800 if (!mumps->myid) { /* information from the host */ 1801 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1805 1806 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1820 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1838 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1840 } 1841 } 1842 } 1843 PetscFunctionReturn(0); 1844 } 1845 1846 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1847 { 1848 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1849 1850 PetscFunctionBegin; 1851 info->block_size = 1.0; 1852 info->nz_allocated = mumps->id.INFOG(20); 1853 info->nz_used = mumps->id.INFOG(20); 1854 info->nz_unneeded = 0.0; 1855 info->assemblies = 0.0; 1856 info->mallocs = 0.0; 1857 info->memory = 0.0; 1858 info->fill_ratio_given = 0; 1859 info->fill_ratio_needed = 0; 1860 info->factor_mallocs = 0; 1861 PetscFunctionReturn(0); 1862 } 1863 1864 /* -------------------------------------------------------------------------------------------*/ 1865 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1866 { 1867 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1868 const PetscInt *idxs; 1869 PetscInt size,i; 1870 PetscErrorCode ierr; 1871 1872 PetscFunctionBegin; 1873 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1874 if (mumps->petsc_size > 1) { 1875 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1876 1877 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1878 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1879 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1880 } 1881 if (mumps->id.size_schur != size) { 1882 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1883 mumps->id.size_schur = size; 1884 mumps->id.schur_lld = size; 1885 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1886 } 1887 1888 /* Schur complement matrix */ 1889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1890 if (mumps->sym == 1) { 1891 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1892 } 1893 1894 /* MUMPS expects Fortran style indices */ 1895 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1896 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1897 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1898 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1899 if (mumps->petsc_size > 1) { 1900 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1901 } else { 1902 if (F->factortype == MAT_FACTOR_LU) { 1903 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1904 } else { 1905 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1906 } 1907 } 1908 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1909 mumps->id.ICNTL(26) = -1; 1910 PetscFunctionReturn(0); 1911 } 1912 1913 /* -------------------------------------------------------------------------------------------*/ 1914 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1915 { 1916 Mat St; 1917 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1918 PetscScalar *array; 1919 #if defined(PETSC_USE_COMPLEX) 1920 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1921 #endif 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1926 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1927 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1928 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1929 ierr = MatSetUp(St);CHKERRQ(ierr); 1930 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1931 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1932 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1933 PetscInt i,j,N=mumps->id.size_schur; 1934 for (i=0;i<N;i++) { 1935 for (j=0;j<N;j++) { 1936 #if !defined(PETSC_USE_COMPLEX) 1937 PetscScalar val = mumps->id.schur[i*N+j]; 1938 #else 1939 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1940 #endif 1941 array[j*N+i] = val; 1942 } 1943 } 1944 } else { /* stored by columns */ 1945 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1946 } 1947 } else { /* either full or lower-triangular (not packed) */ 1948 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1949 PetscInt i,j,N=mumps->id.size_schur; 1950 for (i=0;i<N;i++) { 1951 for (j=i;j<N;j++) { 1952 #if !defined(PETSC_USE_COMPLEX) 1953 PetscScalar val = mumps->id.schur[i*N+j]; 1954 #else 1955 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1956 #endif 1957 array[i*N+j] = val; 1958 array[j*N+i] = val; 1959 } 1960 } 1961 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1962 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1963 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1964 PetscInt i,j,N=mumps->id.size_schur; 1965 for (i=0;i<N;i++) { 1966 for (j=0;j<i+1;j++) { 1967 #if !defined(PETSC_USE_COMPLEX) 1968 PetscScalar val = mumps->id.schur[i*N+j]; 1969 #else 1970 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1971 #endif 1972 array[i*N+j] = val; 1973 array[j*N+i] = val; 1974 } 1975 } 1976 } 1977 } 1978 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1979 *S = St; 1980 PetscFunctionReturn(0); 1981 } 1982 1983 /* -------------------------------------------------------------------------------------------*/ 1984 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1985 { 1986 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1987 1988 PetscFunctionBegin; 1989 mumps->id.ICNTL(icntl) = ival; 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1994 { 1995 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1996 1997 PetscFunctionBegin; 1998 *ival = mumps->id.ICNTL(icntl); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /*@ 2003 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2004 2005 Logically Collective on Mat 2006 2007 Input Parameters: 2008 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2009 . icntl - index of MUMPS parameter array ICNTL() 2010 - ival - value of MUMPS ICNTL(icntl) 2011 2012 Options Database: 2013 . -mat_mumps_icntl_<icntl> <ival> 2014 2015 Level: beginner 2016 2017 References: 2018 . MUMPS Users' Guide 2019 2020 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2021 @*/ 2022 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2023 { 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 PetscValidType(F,1); 2028 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2029 PetscValidLogicalCollectiveInt(F,icntl,2); 2030 PetscValidLogicalCollectiveInt(F,ival,3); 2031 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 /*@ 2036 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2037 2038 Logically Collective on Mat 2039 2040 Input Parameters: 2041 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2042 - icntl - index of MUMPS parameter array ICNTL() 2043 2044 Output Parameter: 2045 . ival - value of MUMPS ICNTL(icntl) 2046 2047 Level: beginner 2048 2049 References: 2050 . MUMPS Users' Guide 2051 2052 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2053 @*/ 2054 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2055 { 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 PetscValidType(F,1); 2060 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2061 PetscValidLogicalCollectiveInt(F,icntl,2); 2062 PetscValidIntPointer(ival,3); 2063 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 /* -------------------------------------------------------------------------------------------*/ 2068 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2069 { 2070 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2071 2072 PetscFunctionBegin; 2073 mumps->id.CNTL(icntl) = val; 2074 PetscFunctionReturn(0); 2075 } 2076 2077 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2078 { 2079 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2080 2081 PetscFunctionBegin; 2082 *val = mumps->id.CNTL(icntl); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 /*@ 2087 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2088 2089 Logically Collective on Mat 2090 2091 Input Parameters: 2092 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2093 . icntl - index of MUMPS parameter array CNTL() 2094 - val - value of MUMPS CNTL(icntl) 2095 2096 Options Database: 2097 . -mat_mumps_cntl_<icntl> <val> 2098 2099 Level: beginner 2100 2101 References: 2102 . MUMPS Users' Guide 2103 2104 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2105 @*/ 2106 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2107 { 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 PetscValidType(F,1); 2112 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2113 PetscValidLogicalCollectiveInt(F,icntl,2); 2114 PetscValidLogicalCollectiveReal(F,val,3); 2115 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /*@ 2120 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2121 2122 Logically Collective on Mat 2123 2124 Input Parameters: 2125 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2126 - icntl - index of MUMPS parameter array CNTL() 2127 2128 Output Parameter: 2129 . val - value of MUMPS CNTL(icntl) 2130 2131 Level: beginner 2132 2133 References: 2134 . MUMPS Users' Guide 2135 2136 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2137 @*/ 2138 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2139 { 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 PetscValidType(F,1); 2144 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2145 PetscValidLogicalCollectiveInt(F,icntl,2); 2146 PetscValidRealPointer(val,3); 2147 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2152 { 2153 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2154 2155 PetscFunctionBegin; 2156 *info = mumps->id.INFO(icntl); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2161 { 2162 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2163 2164 PetscFunctionBegin; 2165 *infog = mumps->id.INFOG(icntl); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2170 { 2171 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2172 2173 PetscFunctionBegin; 2174 *rinfo = mumps->id.RINFO(icntl); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2179 { 2180 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2181 2182 PetscFunctionBegin; 2183 *rinfog = mumps->id.RINFOG(icntl); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2188 { 2189 PetscErrorCode ierr; 2190 Mat Bt = NULL,Btseq = NULL; 2191 PetscBool flg; 2192 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2193 PetscScalar *aa; 2194 PetscInt spnr,*ia,*ja; 2195 2196 PetscFunctionBegin; 2197 PetscValidIntPointer(spRHS,2); 2198 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2199 if (flg) { 2200 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2201 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2202 2203 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2204 2205 if (mumps->petsc_size > 1) { 2206 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2207 Btseq = b->A; 2208 } else { 2209 Btseq = Bt; 2210 } 2211 2212 if (!mumps->myid) { 2213 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2214 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2215 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2216 2217 mumps->id.irhs_ptr = ia; 2218 mumps->id.irhs_sparse = ja; 2219 mumps->id.nz_rhs = ia[spnr] - 1; 2220 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2221 } else { 2222 mumps->id.irhs_ptr = NULL; 2223 mumps->id.irhs_sparse = NULL; 2224 mumps->id.nz_rhs = 0; 2225 mumps->id.rhs_sparse = NULL; 2226 } 2227 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2228 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2229 2230 /* solve phase */ 2231 /*-------------*/ 2232 mumps->id.job = JOB_SOLVE; 2233 PetscMUMPS_c(mumps); 2234 if (mumps->id.INFOG(1) < 0) 2235 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2236 2237 if (!mumps->myid) { 2238 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2239 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2240 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2241 } 2242 PetscFunctionReturn(0); 2243 } 2244 2245 /*@ 2246 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2247 2248 Logically Collective on Mat 2249 2250 Input Parameters: 2251 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2252 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2253 2254 Output Parameter: 2255 . spRHS - requested entries of inverse of A 2256 2257 Level: beginner 2258 2259 References: 2260 . MUMPS Users' Guide 2261 2262 .seealso: MatGetFactor(), MatCreateTranspose() 2263 @*/ 2264 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2265 { 2266 PetscErrorCode ierr; 2267 2268 PetscFunctionBegin; 2269 PetscValidType(F,1); 2270 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2271 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2276 { 2277 PetscErrorCode ierr; 2278 Mat spRHS; 2279 2280 PetscFunctionBegin; 2281 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2282 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2283 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 /*@ 2288 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2289 2290 Logically Collective on Mat 2291 2292 Input Parameters: 2293 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2294 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2295 2296 Output Parameter: 2297 . spRHST - requested entries of inverse of A^T 2298 2299 Level: beginner 2300 2301 References: 2302 . MUMPS Users' Guide 2303 2304 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2305 @*/ 2306 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2307 { 2308 PetscErrorCode ierr; 2309 PetscBool flg; 2310 2311 PetscFunctionBegin; 2312 PetscValidType(F,1); 2313 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2314 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2315 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2316 2317 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 /*@ 2322 MatMumpsGetInfo - Get MUMPS parameter INFO() 2323 2324 Logically Collective on Mat 2325 2326 Input Parameters: 2327 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2328 - icntl - index of MUMPS parameter array INFO() 2329 2330 Output Parameter: 2331 . ival - value of MUMPS INFO(icntl) 2332 2333 Level: beginner 2334 2335 References: 2336 . MUMPS Users' Guide 2337 2338 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2339 @*/ 2340 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2341 { 2342 PetscErrorCode ierr; 2343 2344 PetscFunctionBegin; 2345 PetscValidType(F,1); 2346 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2347 PetscValidIntPointer(ival,3); 2348 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 /*@ 2353 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2354 2355 Logically Collective on Mat 2356 2357 Input Parameters: 2358 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2359 - icntl - index of MUMPS parameter array INFOG() 2360 2361 Output Parameter: 2362 . ival - value of MUMPS INFOG(icntl) 2363 2364 Level: beginner 2365 2366 References: 2367 . MUMPS Users' Guide 2368 2369 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2370 @*/ 2371 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2372 { 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 PetscValidType(F,1); 2377 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2378 PetscValidIntPointer(ival,3); 2379 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 /*@ 2384 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2385 2386 Logically Collective on Mat 2387 2388 Input Parameters: 2389 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2390 - icntl - index of MUMPS parameter array RINFO() 2391 2392 Output Parameter: 2393 . val - value of MUMPS RINFO(icntl) 2394 2395 Level: beginner 2396 2397 References: 2398 . MUMPS Users' Guide 2399 2400 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2401 @*/ 2402 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2403 { 2404 PetscErrorCode ierr; 2405 2406 PetscFunctionBegin; 2407 PetscValidType(F,1); 2408 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2409 PetscValidRealPointer(val,3); 2410 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 /*@ 2415 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2416 2417 Logically Collective on Mat 2418 2419 Input Parameters: 2420 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2421 - icntl - index of MUMPS parameter array RINFOG() 2422 2423 Output Parameter: 2424 . val - value of MUMPS RINFOG(icntl) 2425 2426 Level: beginner 2427 2428 References: 2429 . MUMPS Users' Guide 2430 2431 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2432 @*/ 2433 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2434 { 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 PetscValidType(F,1); 2439 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2440 PetscValidRealPointer(val,3); 2441 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 /*MC 2446 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2447 distributed and sequential matrices via the external package MUMPS. 2448 2449 Works with MATAIJ and MATSBAIJ matrices 2450 2451 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2452 2453 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2454 2455 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2456 2457 Options Database Keys: 2458 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2459 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2460 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2461 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2462 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2463 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2464 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2465 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2466 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2467 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2468 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2469 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2470 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2471 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2472 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2473 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2474 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2475 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2476 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2477 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2478 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2479 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2480 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2481 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2482 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2483 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2484 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2485 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2486 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2487 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2488 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2489 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2490 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2491 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2492 2493 Level: beginner 2494 2495 Notes: 2496 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2497 2498 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2499 $ KSPGetPC(ksp,&pc); 2500 $ PCFactorGetMatrix(pc,&mat); 2501 $ MatMumpsGetInfo(mat,....); 2502 $ MatMumpsGetInfog(mat,....); etc. 2503 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2504 2505 Two modes to run MUMPS/PETSc with OpenMP 2506 2507 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2508 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2509 2510 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2511 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2512 2513 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2514 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2515 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2516 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2517 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2518 2519 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2520 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2521 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2522 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2523 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2524 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2525 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2526 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2527 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2528 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2529 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2530 examine the mapping result. 2531 2532 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2533 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2534 calls omp_set_num_threads(m) internally before calling MUMPS. 2535 2536 References: 2537 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2538 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2539 2540 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2541 2542 M*/ 2543 2544 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2545 { 2546 PetscFunctionBegin; 2547 *type = MATSOLVERMUMPS; 2548 PetscFunctionReturn(0); 2549 } 2550 2551 /* MatGetFactor for Seq and MPI AIJ matrices */ 2552 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2553 { 2554 Mat B; 2555 PetscErrorCode ierr; 2556 Mat_MUMPS *mumps; 2557 PetscBool isSeqAIJ; 2558 2559 PetscFunctionBegin; 2560 /* Create the factorization matrix */ 2561 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2562 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2563 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2564 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2565 ierr = MatSetUp(B);CHKERRQ(ierr); 2566 2567 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2568 2569 B->ops->view = MatView_MUMPS; 2570 B->ops->getinfo = MatGetInfo_MUMPS; 2571 2572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2583 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2585 2586 if (ftype == MAT_FACTOR_LU) { 2587 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2588 B->factortype = MAT_FACTOR_LU; 2589 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2590 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2591 mumps->sym = 0; 2592 } else { 2593 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2594 B->factortype = MAT_FACTOR_CHOLESKY; 2595 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2596 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2597 #if defined(PETSC_USE_COMPLEX) 2598 mumps->sym = 2; 2599 #else 2600 if (A->spd_set && A->spd) mumps->sym = 1; 2601 else mumps->sym = 2; 2602 #endif 2603 } 2604 2605 /* set solvertype */ 2606 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2607 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2608 2609 B->ops->destroy = MatDestroy_MUMPS; 2610 B->data = (void*)mumps; 2611 2612 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2613 2614 *F = B; 2615 PetscFunctionReturn(0); 2616 } 2617 2618 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2619 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2620 { 2621 Mat B; 2622 PetscErrorCode ierr; 2623 Mat_MUMPS *mumps; 2624 PetscBool isSeqSBAIJ; 2625 2626 PetscFunctionBegin; 2627 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2628 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2629 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2630 /* Create the factorization matrix */ 2631 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2632 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2633 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2634 ierr = MatSetUp(B);CHKERRQ(ierr); 2635 2636 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2637 if (isSeqSBAIJ) { 2638 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2639 } else { 2640 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2641 } 2642 2643 B->ops->getinfo = MatGetInfo_External; 2644 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2645 B->ops->view = MatView_MUMPS; 2646 2647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2660 2661 B->factortype = MAT_FACTOR_CHOLESKY; 2662 #if defined(PETSC_USE_COMPLEX) 2663 mumps->sym = 2; 2664 #else 2665 if (A->spd_set && A->spd) mumps->sym = 1; 2666 else mumps->sym = 2; 2667 #endif 2668 2669 /* set solvertype */ 2670 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2671 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2672 2673 B->ops->destroy = MatDestroy_MUMPS; 2674 B->data = (void*)mumps; 2675 2676 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2677 2678 *F = B; 2679 PetscFunctionReturn(0); 2680 } 2681 2682 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2683 { 2684 Mat B; 2685 PetscErrorCode ierr; 2686 Mat_MUMPS *mumps; 2687 PetscBool isSeqBAIJ; 2688 2689 PetscFunctionBegin; 2690 /* Create the factorization matrix */ 2691 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2692 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2693 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2694 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2695 ierr = MatSetUp(B);CHKERRQ(ierr); 2696 2697 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2698 if (ftype == MAT_FACTOR_LU) { 2699 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2700 B->factortype = MAT_FACTOR_LU; 2701 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2702 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2703 mumps->sym = 0; 2704 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2705 2706 B->ops->getinfo = MatGetInfo_External; 2707 B->ops->view = MatView_MUMPS; 2708 2709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2710 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2718 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2720 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2721 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2722 2723 /* set solvertype */ 2724 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2725 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2726 2727 B->ops->destroy = MatDestroy_MUMPS; 2728 B->data = (void*)mumps; 2729 2730 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2731 2732 *F = B; 2733 PetscFunctionReturn(0); 2734 } 2735 2736 /* MatGetFactor for Seq and MPI SELL matrices */ 2737 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2738 { 2739 Mat B; 2740 PetscErrorCode ierr; 2741 Mat_MUMPS *mumps; 2742 PetscBool isSeqSELL; 2743 2744 PetscFunctionBegin; 2745 /* Create the factorization matrix */ 2746 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2747 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2748 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2749 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2750 ierr = MatSetUp(B);CHKERRQ(ierr); 2751 2752 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2753 2754 B->ops->view = MatView_MUMPS; 2755 B->ops->getinfo = MatGetInfo_MUMPS; 2756 2757 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2758 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2759 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2761 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2762 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2763 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2764 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2765 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2766 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2767 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2768 2769 if (ftype == MAT_FACTOR_LU) { 2770 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2771 B->factortype = MAT_FACTOR_LU; 2772 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2773 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2774 mumps->sym = 0; 2775 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2776 2777 /* set solvertype */ 2778 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2779 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2780 2781 B->ops->destroy = MatDestroy_MUMPS; 2782 B->data = (void*)mumps; 2783 2784 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2785 2786 *F = B; 2787 PetscFunctionReturn(0); 2788 } 2789 2790 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2791 { 2792 PetscErrorCode ierr; 2793 2794 PetscFunctionBegin; 2795 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2796 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2797 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2798 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2799 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2800 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2801 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2802 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2803 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2804 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2805 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2806 PetscFunctionReturn(0); 2807 } 2808 2809