1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj; 339 PetscInt nz,rnz,i,j,k,m,bs; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 PetscScalar *val; 343 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 344 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 345 const PetscBool roworiented=aa->roworiented; 346 #if defined(PETSC_USE_COMPLEX) 347 PetscBool hermitian; 348 #endif 349 350 PetscFunctionBegin; 351 #if defined(PETSC_USE_COMPLEX) 352 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 353 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 354 #endif 355 ai = aa->i; 356 aj = aa->j; 357 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 358 if (reuse == MAT_INITIAL_MATRIX) { 359 nz = aa->nz; 360 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 361 col = row + bs2*nz; 362 val = (PetscScalar*)(col + bs2*nz); 363 364 *r = row; *c = col; *v = val; 365 } else { 366 row = *r; col = *c; val = *v; 367 } 368 369 nz = 0; 370 for (i=0; i<mbs; i++) { 371 rnz = ai[i+1] - ai[i]; 372 ajj = aj + ai[i]; 373 if (roworiented) { 374 for (j=0; j<rnz; j++) { 375 for (k=0; k<bs; k++) { 376 for (m=0; m<bs; m++) { 377 if (ajj[j]>i || m>=k) { 378 if (reuse == MAT_INITIAL_MATRIX) { 379 row[nz] = i*bs + k + shift; 380 col[nz] = ajj[j]*bs + m + shift; 381 } 382 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 383 } 384 } 385 } 386 } 387 } else { 388 for (j=0; j<rnz; j++) { 389 for (k=0; k<bs; k++) { 390 for (m=0; m<bs; m++) { 391 if (ajj[j]>i || m>=k) { 392 if (reuse == MAT_INITIAL_MATRIX) { 393 row[nz] = i*bs + m + shift; 394 col[nz] = ajj[j]*bs + k + shift; 395 } 396 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 397 } 398 } 399 } 400 } 401 } 402 } 403 *nnz = nz; 404 PetscFunctionReturn(0); 405 } 406 407 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 408 { 409 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 410 PetscInt nz,rnz,i,j; 411 const PetscScalar *av,*v1; 412 PetscScalar *val; 413 PetscErrorCode ierr; 414 PetscInt *row,*col; 415 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 416 PetscBool missing; 417 #if defined(PETSC_USE_COMPLEX) 418 PetscBool hermitian; 419 #endif 420 421 PetscFunctionBegin; 422 #if defined(PETSC_USE_COMPLEX) 423 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 424 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 425 #endif 426 ai = aa->i; aj = aa->j; av = aa->a; 427 adiag = aa->diag; 428 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 429 if (reuse == MAT_INITIAL_MATRIX) { 430 /* count nz in the upper triangular part of A */ 431 nz = 0; 432 if (missing) { 433 for (i=0; i<M; i++) { 434 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 435 for (j=ai[i];j<ai[i+1];j++) { 436 if (aj[j] < i) continue; 437 nz++; 438 } 439 } else { 440 nz += ai[i+1] - adiag[i]; 441 } 442 } 443 } else { 444 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 445 } 446 *nnz = nz; 447 448 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 449 col = row + nz; 450 val = (PetscScalar*)(col + nz); 451 452 nz = 0; 453 if (missing) { 454 for (i=0; i<M; i++) { 455 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 456 for (j=ai[i];j<ai[i+1];j++) { 457 if (aj[j] < i) continue; 458 row[nz] = i+shift; 459 col[nz] = aj[j]+shift; 460 val[nz] = av[j]; 461 nz++; 462 } 463 } else { 464 rnz = ai[i+1] - adiag[i]; 465 ajj = aj + adiag[i]; 466 v1 = av + adiag[i]; 467 for (j=0; j<rnz; j++) { 468 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 469 } 470 } 471 } 472 } else { 473 for (i=0; i<M; i++) { 474 rnz = ai[i+1] - adiag[i]; 475 ajj = aj + adiag[i]; 476 v1 = av + adiag[i]; 477 for (j=0; j<rnz; j++) { 478 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 479 } 480 } 481 } 482 *r = row; *c = col; *v = val; 483 } else { 484 nz = 0; val = *v; 485 if (missing) { 486 for (i=0; i <M; i++) { 487 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 488 for (j=ai[i];j<ai[i+1];j++) { 489 if (aj[j] < i) continue; 490 val[nz++] = av[j]; 491 } 492 } else { 493 rnz = ai[i+1] - adiag[i]; 494 v1 = av + adiag[i]; 495 for (j=0; j<rnz; j++) { 496 val[nz++] = v1[j]; 497 } 498 } 499 } 500 } else { 501 for (i=0; i <M; i++) { 502 rnz = ai[i+1] - adiag[i]; 503 v1 = av + adiag[i]; 504 for (j=0; j<rnz; j++) { 505 val[nz++] = v1[j]; 506 } 507 } 508 } 509 } 510 PetscFunctionReturn(0); 511 } 512 513 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 514 { 515 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 516 PetscErrorCode ierr; 517 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 518 PetscInt *row,*col; 519 const PetscScalar *av, *bv,*v1,*v2; 520 PetscScalar *val; 521 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 522 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 523 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 524 #if defined(PETSC_USE_COMPLEX) 525 PetscBool hermitian; 526 #endif 527 528 PetscFunctionBegin; 529 #if defined(PETSC_USE_COMPLEX) 530 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 531 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 532 #endif 533 rstart = A->rmap->rstart; 534 ai = aa->i; 535 aj = aa->j; 536 bi = bb->i; 537 bj = bb->j; 538 av = aa->a; 539 bv = bb->a; 540 541 garray = mat->garray; 542 543 if (reuse == MAT_INITIAL_MATRIX) { 544 nz = aa->nz + bb->nz; 545 *nnz = nz; 546 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 547 col = row + nz; 548 val = (PetscScalar*)(col + nz); 549 550 *r = row; *c = col; *v = val; 551 } else { 552 row = *r; col = *c; val = *v; 553 } 554 555 jj = 0; irow = rstart; 556 for (i=0; i<m; i++) { 557 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 558 countA = ai[i+1] - ai[i]; 559 countB = bi[i+1] - bi[i]; 560 bjj = bj + bi[i]; 561 v1 = av + ai[i]; 562 v2 = bv + bi[i]; 563 564 /* A-part */ 565 for (j=0; j<countA; j++) { 566 if (reuse == MAT_INITIAL_MATRIX) { 567 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 568 } 569 val[jj++] = v1[j]; 570 } 571 572 /* B-part */ 573 for (j=0; j < countB; j++) { 574 if (reuse == MAT_INITIAL_MATRIX) { 575 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 576 } 577 val[jj++] = v2[j]; 578 } 579 irow++; 580 } 581 PetscFunctionReturn(0); 582 } 583 584 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 585 { 586 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 587 PetscErrorCode ierr; 588 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 589 PetscInt *row,*col; 590 const PetscScalar *av, *bv,*v1,*v2; 591 PetscScalar *val; 592 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 593 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 594 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 595 596 PetscFunctionBegin; 597 rstart = A->rmap->rstart; 598 ai = aa->i; 599 aj = aa->j; 600 bi = bb->i; 601 bj = bb->j; 602 av = aa->a; 603 bv = bb->a; 604 605 garray = mat->garray; 606 607 if (reuse == MAT_INITIAL_MATRIX) { 608 nz = aa->nz + bb->nz; 609 *nnz = nz; 610 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 611 col = row + nz; 612 val = (PetscScalar*)(col + nz); 613 614 *r = row; *c = col; *v = val; 615 } else { 616 row = *r; col = *c; val = *v; 617 } 618 619 jj = 0; irow = rstart; 620 for (i=0; i<m; i++) { 621 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 622 countA = ai[i+1] - ai[i]; 623 countB = bi[i+1] - bi[i]; 624 bjj = bj + bi[i]; 625 v1 = av + ai[i]; 626 v2 = bv + bi[i]; 627 628 /* A-part */ 629 for (j=0; j<countA; j++) { 630 if (reuse == MAT_INITIAL_MATRIX) { 631 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 632 } 633 val[jj++] = v1[j]; 634 } 635 636 /* B-part */ 637 for (j=0; j < countB; j++) { 638 if (reuse == MAT_INITIAL_MATRIX) { 639 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 640 } 641 val[jj++] = v2[j]; 642 } 643 irow++; 644 } 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 649 { 650 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 651 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 652 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 653 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 654 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 655 const PetscInt bs2=mat->bs2; 656 PetscErrorCode ierr; 657 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 658 PetscInt *row,*col; 659 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 660 PetscScalar *val; 661 662 PetscFunctionBegin; 663 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 664 if (reuse == MAT_INITIAL_MATRIX) { 665 nz = bs2*(aa->nz + bb->nz); 666 *nnz = nz; 667 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 668 col = row + nz; 669 val = (PetscScalar*)(col + nz); 670 671 *r = row; *c = col; *v = val; 672 } else { 673 row = *r; col = *c; val = *v; 674 } 675 676 jj = 0; irow = rstart; 677 for (i=0; i<mbs; i++) { 678 countA = ai[i+1] - ai[i]; 679 countB = bi[i+1] - bi[i]; 680 ajj = aj + ai[i]; 681 bjj = bj + bi[i]; 682 v1 = av + bs2*ai[i]; 683 v2 = bv + bs2*bi[i]; 684 685 idx = 0; 686 /* A-part */ 687 for (k=0; k<countA; k++) { 688 for (j=0; j<bs; j++) { 689 for (n=0; n<bs; n++) { 690 if (reuse == MAT_INITIAL_MATRIX) { 691 row[jj] = irow + n + shift; 692 col[jj] = rstart + bs*ajj[k] + j + shift; 693 } 694 val[jj++] = v1[idx++]; 695 } 696 } 697 } 698 699 idx = 0; 700 /* B-part */ 701 for (k=0; k<countB; k++) { 702 for (j=0; j<bs; j++) { 703 for (n=0; n<bs; n++) { 704 if (reuse == MAT_INITIAL_MATRIX) { 705 row[jj] = irow + n + shift; 706 col[jj] = bs*garray[bjj[k]] + j + shift; 707 } 708 val[jj++] = v2[idx++]; 709 } 710 } 711 } 712 irow += bs; 713 } 714 PetscFunctionReturn(0); 715 } 716 717 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 718 { 719 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 720 PetscErrorCode ierr; 721 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 722 PetscInt *row,*col; 723 const PetscScalar *av, *bv,*v1,*v2; 724 PetscScalar *val; 725 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 726 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 727 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 728 #if defined(PETSC_USE_COMPLEX) 729 PetscBool hermitian; 730 #endif 731 732 PetscFunctionBegin; 733 #if defined(PETSC_USE_COMPLEX) 734 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 735 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 736 #endif 737 ai = aa->i; 738 aj = aa->j; 739 adiag = aa->diag; 740 bi = bb->i; 741 bj = bb->j; 742 garray = mat->garray; 743 av = aa->a; 744 bv = bb->a; 745 746 rstart = A->rmap->rstart; 747 748 if (reuse == MAT_INITIAL_MATRIX) { 749 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 750 nzb = 0; /* num of upper triangular entries in mat->B */ 751 for (i=0; i<m; i++) { 752 nza += (ai[i+1] - adiag[i]); 753 countB = bi[i+1] - bi[i]; 754 bjj = bj + bi[i]; 755 for (j=0; j<countB; j++) { 756 if (garray[bjj[j]] > rstart) nzb++; 757 } 758 } 759 760 nz = nza + nzb; /* total nz of upper triangular part of mat */ 761 *nnz = nz; 762 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 763 col = row + nz; 764 val = (PetscScalar*)(col + nz); 765 766 *r = row; *c = col; *v = val; 767 } else { 768 row = *r; col = *c; val = *v; 769 } 770 771 jj = 0; irow = rstart; 772 for (i=0; i<m; i++) { 773 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 774 v1 = av + adiag[i]; 775 countA = ai[i+1] - adiag[i]; 776 countB = bi[i+1] - bi[i]; 777 bjj = bj + bi[i]; 778 v2 = bv + bi[i]; 779 780 /* A-part */ 781 for (j=0; j<countA; j++) { 782 if (reuse == MAT_INITIAL_MATRIX) { 783 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 784 } 785 val[jj++] = v1[j]; 786 } 787 788 /* B-part */ 789 for (j=0; j < countB; j++) { 790 if (garray[bjj[j]] > rstart) { 791 if (reuse == MAT_INITIAL_MATRIX) { 792 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 793 } 794 val[jj++] = v2[j]; 795 } 796 } 797 irow++; 798 } 799 PetscFunctionReturn(0); 800 } 801 802 PetscErrorCode MatDestroy_MUMPS(Mat A) 803 { 804 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 805 PetscErrorCode ierr; 806 807 PetscFunctionBegin; 808 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 809 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 810 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 811 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 812 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 813 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 814 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 815 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 816 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 817 mumps->id.job = JOB_END; 818 PetscMUMPS_c(mumps); 819 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 820 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 821 #endif 822 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 823 ierr = PetscFree(A->data);CHKERRQ(ierr); 824 825 /* clear composed functions */ 826 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 827 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 828 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 829 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 830 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 831 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 832 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 833 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 834 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 836 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 837 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 838 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 839 PetscFunctionReturn(0); 840 } 841 842 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 843 { 844 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 845 PetscScalar *array; 846 Vec b_seq; 847 IS is_iden,is_petsc; 848 PetscErrorCode ierr; 849 PetscInt i; 850 PetscBool second_solve = PETSC_FALSE; 851 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 852 853 PetscFunctionBegin; 854 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 855 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 856 857 if (A->factorerrortype) { 858 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 859 ierr = VecSetInf(x);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 mumps->id.ICNTL(20)= 0; /* dense RHS */ 864 mumps->id.nrhs = 1; 865 b_seq = mumps->b_seq; 866 if (mumps->petsc_size > 1) { 867 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 868 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 871 } else { /* petsc_size == 1 */ 872 ierr = VecCopy(b,x);CHKERRQ(ierr); 873 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 874 } 875 if (!mumps->myid) { /* define rhs on the host */ 876 mumps->id.nrhs = 1; 877 mumps->id.rhs = (MumpsScalar*)array; 878 } 879 880 /* 881 handle condensation step of Schur complement (if any) 882 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 883 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 884 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 885 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 886 */ 887 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 888 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 889 second_solve = PETSC_TRUE; 890 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 891 } 892 /* solve phase */ 893 /*-------------*/ 894 mumps->id.job = JOB_SOLVE; 895 PetscMUMPS_c(mumps); 896 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 897 898 /* handle expansion step of Schur complement (if any) */ 899 if (second_solve) { 900 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 901 } 902 903 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 904 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 905 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 906 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 907 } 908 if (!mumps->scat_sol) { /* create scatter scat_sol */ 909 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 910 for (i=0; i<mumps->id.lsol_loc; i++) { 911 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 912 } 913 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 914 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 915 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 916 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 917 918 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 919 } 920 921 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 } 924 925 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 926 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 927 928 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 932 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 933 { 934 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 mumps->id.ICNTL(9) = 0; 939 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 940 mumps->id.ICNTL(9) = 1; 941 PetscFunctionReturn(0); 942 } 943 944 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 945 { 946 PetscErrorCode ierr; 947 Mat Bt = NULL; 948 PetscBool flg, flgT; 949 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 950 PetscInt i,nrhs,M; 951 PetscScalar *array; 952 const PetscScalar *rbray; 953 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 954 PetscScalar *bray,*sol_loc,*sol_loc_save; 955 IS is_to,is_from; 956 PetscInt k,proc,j,m,myrstart; 957 const PetscInt *rstart; 958 Vec v_mpi,b_seq,msol_loc; 959 VecScatter scat_rhs,scat_sol; 960 PetscScalar *aa; 961 PetscInt spnr,*ia,*ja; 962 Mat_MPIAIJ *b = NULL; 963 964 PetscFunctionBegin; 965 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 966 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 967 968 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 969 if (flg) { /* dense B */ 970 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 971 mumps->id.ICNTL(20)= 0; /* dense RHS */ 972 } else { /* sparse B */ 973 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 974 if (flgT) { /* input B is transpose of actural RHS matrix, 975 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 976 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 977 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 978 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 979 } 980 981 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 982 mumps->id.nrhs = nrhs; 983 mumps->id.lrhs = M; 984 mumps->id.rhs = NULL; 985 986 if (mumps->petsc_size == 1) { 987 PetscScalar *aa; 988 PetscInt spnr,*ia,*ja; 989 PetscBool second_solve = PETSC_FALSE; 990 991 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 992 mumps->id.rhs = (MumpsScalar*)array; 993 994 if (!Bt) { /* dense B */ 995 /* copy B to X */ 996 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 997 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 998 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 999 } else { /* sparse B */ 1000 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1001 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1002 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1003 /* mumps requires ia and ja start at 1! */ 1004 mumps->id.irhs_ptr = ia; 1005 mumps->id.irhs_sparse = ja; 1006 mumps->id.nz_rhs = ia[spnr] - 1; 1007 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1008 } 1009 /* handle condensation step of Schur complement (if any) */ 1010 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1011 second_solve = PETSC_TRUE; 1012 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1013 } 1014 /* solve phase */ 1015 /*-------------*/ 1016 mumps->id.job = JOB_SOLVE; 1017 PetscMUMPS_c(mumps); 1018 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1019 1020 /* handle expansion step of Schur complement (if any) */ 1021 if (second_solve) { 1022 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1023 } 1024 if (Bt) { /* sparse B */ 1025 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1026 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1027 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1028 } 1029 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1034 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1035 1036 /* create msol_loc to hold mumps local solution */ 1037 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1038 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1039 1040 lsol_loc = mumps->id.lsol_loc; 1041 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1042 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1043 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1044 mumps->id.isol_loc = isol_loc; 1045 1046 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1047 1048 if (!Bt) { /* dense B */ 1049 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1050 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1051 0, re-arrange B into desired order, which is a local operation. 1052 */ 1053 1054 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1055 /* wrap dense rhs matrix B into a vector v_mpi */ 1056 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1057 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1058 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1059 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1060 1061 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1062 if (!mumps->myid) { 1063 PetscInt *idx; 1064 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1065 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1066 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1067 k = 0; 1068 for (proc=0; proc<mumps->petsc_size; proc++){ 1069 for (j=0; j<nrhs; j++){ 1070 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1071 } 1072 } 1073 1074 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1075 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1076 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1077 } else { 1078 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1079 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1080 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1081 } 1082 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1083 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1085 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 1088 if (!mumps->myid) { /* define rhs on the host */ 1089 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1090 mumps->id.rhs = (MumpsScalar*)bray; 1091 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1092 } 1093 1094 } else { /* sparse B */ 1095 b = (Mat_MPIAIJ*)Bt->data; 1096 1097 /* wrap dense X into a vector v_mpi */ 1098 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1099 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1100 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1101 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1102 1103 if (!mumps->myid) { 1104 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1105 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1106 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1107 /* mumps requires ia and ja start at 1! */ 1108 mumps->id.irhs_ptr = ia; 1109 mumps->id.irhs_sparse = ja; 1110 mumps->id.nz_rhs = ia[spnr] - 1; 1111 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1112 } else { 1113 mumps->id.irhs_ptr = NULL; 1114 mumps->id.irhs_sparse = NULL; 1115 mumps->id.nz_rhs = 0; 1116 mumps->id.rhs_sparse = NULL; 1117 } 1118 } 1119 1120 /* solve phase */ 1121 /*-------------*/ 1122 mumps->id.job = JOB_SOLVE; 1123 PetscMUMPS_c(mumps); 1124 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1125 1126 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1127 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1128 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1129 1130 /* create scatter scat_sol */ 1131 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1132 /* iidx: index for scatter mumps solution to petsc X */ 1133 1134 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1135 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1136 for (i=0; i<lsol_loc; i++) { 1137 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1138 1139 for (proc=0; proc<mumps->petsc_size; proc++){ 1140 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1141 myrstart = rstart[proc]; 1142 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1143 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1144 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1145 break; 1146 } 1147 } 1148 1149 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1150 } 1151 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1152 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1153 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1155 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1156 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1157 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1158 1159 /* free spaces */ 1160 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1161 mumps->id.isol_loc = isol_loc_save; 1162 1163 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1164 ierr = PetscFree(idxx);CHKERRQ(ierr); 1165 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1166 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1167 if (Bt) { 1168 if (!mumps->myid) { 1169 b = (Mat_MPIAIJ*)Bt->data; 1170 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1171 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1172 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1173 } 1174 } else { 1175 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1176 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1177 } 1178 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1179 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1184 { 1185 PetscErrorCode ierr; 1186 PetscBool flg; 1187 Mat B; 1188 1189 PetscFunctionBegin; 1190 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1191 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1192 1193 /* Create B=Bt^T that uses Bt's data structure */ 1194 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1195 1196 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1197 ierr = MatDestroy(&B);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #if !defined(PETSC_USE_COMPLEX) 1202 /* 1203 input: 1204 F: numeric factor 1205 output: 1206 nneg: total number of negative pivots 1207 nzero: total number of zero pivots 1208 npos: (global dimension of F) - nneg - nzero 1209 */ 1210 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1211 { 1212 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1213 PetscErrorCode ierr; 1214 PetscMPIInt size; 1215 1216 PetscFunctionBegin; 1217 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1218 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1219 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1220 1221 if (nneg) *nneg = mumps->id.INFOG(12); 1222 if (nzero || npos) { 1223 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1224 if (nzero) *nzero = mumps->id.INFOG(28); 1225 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1226 } 1227 PetscFunctionReturn(0); 1228 } 1229 #endif 1230 1231 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1232 { 1233 PetscErrorCode ierr; 1234 PetscInt i,nz=0,*irn,*jcn=0; 1235 PetscScalar *val=0; 1236 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1237 1238 PetscFunctionBegin; 1239 if (mumps->omp_comm_size > 1) { 1240 if (reuse == MAT_INITIAL_MATRIX) { 1241 /* master first gathers counts of nonzeros to receive */ 1242 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1243 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1244 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1245 1246 /* master allocates memory to receive nonzeros */ 1247 if (mumps->is_omp_master) { 1248 displs[0] = 0; 1249 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1250 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1251 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1252 jcn = irn + nz; 1253 val = (PetscScalar*)(jcn + nz); 1254 } 1255 1256 /* save the gatherv plan */ 1257 mumps->mpinz = mpinz; /* used as send count */ 1258 mumps->recvcount = recvcount; 1259 mumps->displs = displs; 1260 1261 /* master gathers nonzeros */ 1262 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1263 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1264 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1265 1266 /* master frees its row/col/val and replaces them with bigger arrays */ 1267 if (mumps->is_omp_master) { 1268 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1269 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1270 mumps->irn = irn; 1271 mumps->jcn = jcn; 1272 mumps->val = val; 1273 } 1274 } else { 1275 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1276 } 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1282 { 1283 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1284 PetscErrorCode ierr; 1285 PetscBool isMPIAIJ; 1286 1287 PetscFunctionBegin; 1288 if (mumps->id.INFOG(1) < 0) { 1289 if (mumps->id.INFOG(1) == -6) { 1290 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1291 } 1292 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1297 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1298 1299 /* numerical factorization phase */ 1300 /*-------------------------------*/ 1301 mumps->id.job = JOB_FACTNUMERIC; 1302 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1303 if (!mumps->myid) { 1304 mumps->id.a = (MumpsScalar*)mumps->val; 1305 } 1306 } else { 1307 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1308 } 1309 PetscMUMPS_c(mumps); 1310 if (mumps->id.INFOG(1) < 0) { 1311 if (A->erroriffailure) { 1312 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1313 } else { 1314 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1315 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1316 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1317 } else if (mumps->id.INFOG(1) == -13) { 1318 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1319 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1320 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1321 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1322 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1323 } else { 1324 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1325 F->factorerrortype = MAT_FACTOR_OTHER; 1326 } 1327 } 1328 } 1329 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1330 1331 F->assembled = PETSC_TRUE; 1332 mumps->matstruc = SAME_NONZERO_PATTERN; 1333 if (F->schur) { /* reset Schur status to unfactored */ 1334 #if defined(PETSC_HAVE_CUDA) 1335 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1336 #endif 1337 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1338 mumps->id.ICNTL(19) = 2; 1339 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1340 } 1341 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1342 } 1343 1344 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1345 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1346 1347 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1348 if (mumps->petsc_size > 1) { 1349 PetscInt lsol_loc; 1350 PetscScalar *sol_loc; 1351 1352 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1353 1354 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1355 if (mumps->x_seq) { 1356 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1357 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1358 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1359 } 1360 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1361 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1362 mumps->id.lsol_loc = lsol_loc; 1363 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1364 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1365 } 1366 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 /* Sets MUMPS options from the options database */ 1371 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1372 { 1373 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1374 PetscErrorCode ierr; 1375 PetscInt icntl,info[80],i,ninfo=80; 1376 PetscBool flg; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1380 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1381 if (flg) mumps->id.ICNTL(1) = icntl; 1382 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1383 if (flg) mumps->id.ICNTL(2) = icntl; 1384 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1385 if (flg) mumps->id.ICNTL(3) = icntl; 1386 1387 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1388 if (flg) mumps->id.ICNTL(4) = icntl; 1389 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1390 1391 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1392 if (flg) mumps->id.ICNTL(6) = icntl; 1393 1394 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1395 if (flg) { 1396 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1397 else mumps->id.ICNTL(7) = icntl; 1398 } 1399 1400 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1401 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1402 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1403 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1404 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1405 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1406 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1407 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1408 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1409 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1410 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1411 } 1412 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1413 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1414 1415 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1416 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1417 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1418 if (mumps->id.ICNTL(24)) { 1419 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1420 } 1421 1422 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1423 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1424 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1425 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1426 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1427 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1428 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1429 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1430 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1431 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1432 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1433 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1434 1435 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1436 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1437 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1438 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1439 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1440 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1441 1442 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1443 1444 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1445 if (ninfo) { 1446 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1447 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1448 mumps->ninfo = ninfo; 1449 for (i=0; i<ninfo; i++) { 1450 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1451 else mumps->info[i] = info[i]; 1452 } 1453 } 1454 1455 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1460 { 1461 PetscErrorCode ierr; 1462 PetscInt nthreads=0; 1463 1464 PetscFunctionBegin; 1465 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1466 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1467 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1468 1469 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1470 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1471 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1472 if (mumps->use_petsc_omp_support) { 1473 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1474 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1475 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1476 #else 1477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1478 #endif 1479 } else { 1480 mumps->omp_comm = PETSC_COMM_SELF; 1481 mumps->mumps_comm = mumps->petsc_comm; 1482 mumps->is_omp_master = PETSC_TRUE; 1483 } 1484 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1485 1486 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1487 mumps->id.job = JOB_INIT; 1488 mumps->id.par = 1; /* host participates factorizaton and solve */ 1489 mumps->id.sym = mumps->sym; 1490 1491 PetscMUMPS_c(mumps); 1492 1493 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1494 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1495 */ 1496 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1497 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1498 1499 mumps->scat_rhs = NULL; 1500 mumps->scat_sol = NULL; 1501 1502 /* set PETSc-MUMPS default options - override MUMPS default */ 1503 mumps->id.ICNTL(3) = 0; 1504 mumps->id.ICNTL(4) = 0; 1505 if (mumps->petsc_size == 1) { 1506 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1507 } else { 1508 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1509 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1510 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1511 } 1512 1513 /* schur */ 1514 mumps->id.size_schur = 0; 1515 mumps->id.listvar_schur = NULL; 1516 mumps->id.schur = NULL; 1517 mumps->sizeredrhs = 0; 1518 mumps->schur_sol = NULL; 1519 mumps->schur_sizesol = 0; 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1524 { 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1529 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1530 if (mumps->id.INFOG(1) < 0) { 1531 if (A->erroriffailure) { 1532 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1533 } else { 1534 if (mumps->id.INFOG(1) == -6) { 1535 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1536 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1537 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1538 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1539 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1540 } else { 1541 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1542 F->factorerrortype = MAT_FACTOR_OTHER; 1543 } 1544 } 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548 1549 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1550 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1551 { 1552 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1553 PetscErrorCode ierr; 1554 Vec b; 1555 const PetscInt M = A->rmap->N; 1556 1557 PetscFunctionBegin; 1558 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1559 1560 /* Set MUMPS options from the options database */ 1561 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1562 1563 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1564 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1565 1566 /* analysis phase */ 1567 /*----------------*/ 1568 mumps->id.job = JOB_FACTSYMBOLIC; 1569 mumps->id.n = M; 1570 switch (mumps->id.ICNTL(18)) { 1571 case 0: /* centralized assembled matrix input */ 1572 if (!mumps->myid) { 1573 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1574 if (mumps->id.ICNTL(6)>1) { 1575 mumps->id.a = (MumpsScalar*)mumps->val; 1576 } 1577 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1578 /* 1579 PetscBool flag; 1580 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1581 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1582 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1583 */ 1584 if (!mumps->myid) { 1585 const PetscInt *idx; 1586 PetscInt i,*perm_in; 1587 1588 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1589 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1590 1591 mumps->id.perm_in = perm_in; 1592 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1593 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1594 } 1595 } 1596 } 1597 break; 1598 case 3: /* distributed assembled matrix input (size>1) */ 1599 mumps->id.nz_loc = mumps->nz; 1600 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1601 if (mumps->id.ICNTL(6)>1) { 1602 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1603 } 1604 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1605 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1606 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1607 ierr = VecDestroy(&b);CHKERRQ(ierr); 1608 break; 1609 } 1610 PetscMUMPS_c(mumps); 1611 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1612 1613 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1614 F->ops->solve = MatSolve_MUMPS; 1615 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1616 F->ops->matsolve = MatMatSolve_MUMPS; 1617 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1618 PetscFunctionReturn(0); 1619 } 1620 1621 /* Note the Petsc r and c permutations are ignored */ 1622 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1623 { 1624 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1625 PetscErrorCode ierr; 1626 Vec b; 1627 const PetscInt M = A->rmap->N; 1628 1629 PetscFunctionBegin; 1630 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1631 1632 /* Set MUMPS options from the options database */ 1633 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1634 1635 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1636 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1637 1638 /* analysis phase */ 1639 /*----------------*/ 1640 mumps->id.job = JOB_FACTSYMBOLIC; 1641 mumps->id.n = M; 1642 switch (mumps->id.ICNTL(18)) { 1643 case 0: /* centralized assembled matrix input */ 1644 if (!mumps->myid) { 1645 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1646 if (mumps->id.ICNTL(6)>1) { 1647 mumps->id.a = (MumpsScalar*)mumps->val; 1648 } 1649 } 1650 break; 1651 case 3: /* distributed assembled matrix input (size>1) */ 1652 mumps->id.nz_loc = mumps->nz; 1653 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1654 if (mumps->id.ICNTL(6)>1) { 1655 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1656 } 1657 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1658 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1659 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1660 ierr = VecDestroy(&b);CHKERRQ(ierr); 1661 break; 1662 } 1663 PetscMUMPS_c(mumps); 1664 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1665 1666 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1667 F->ops->solve = MatSolve_MUMPS; 1668 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 /* Note the Petsc r permutation and factor info are ignored */ 1673 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1674 { 1675 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1676 PetscErrorCode ierr; 1677 Vec b; 1678 const PetscInt M = A->rmap->N; 1679 1680 PetscFunctionBegin; 1681 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1682 1683 /* Set MUMPS options from the options database */ 1684 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1685 1686 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1687 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1688 1689 /* analysis phase */ 1690 /*----------------*/ 1691 mumps->id.job = JOB_FACTSYMBOLIC; 1692 mumps->id.n = M; 1693 switch (mumps->id.ICNTL(18)) { 1694 case 0: /* centralized assembled matrix input */ 1695 if (!mumps->myid) { 1696 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1697 if (mumps->id.ICNTL(6)>1) { 1698 mumps->id.a = (MumpsScalar*)mumps->val; 1699 } 1700 } 1701 break; 1702 case 3: /* distributed assembled matrix input (size>1) */ 1703 mumps->id.nz_loc = mumps->nz; 1704 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1705 if (mumps->id.ICNTL(6)>1) { 1706 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1707 } 1708 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1709 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1710 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1711 ierr = VecDestroy(&b);CHKERRQ(ierr); 1712 break; 1713 } 1714 PetscMUMPS_c(mumps); 1715 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1716 1717 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1718 F->ops->solve = MatSolve_MUMPS; 1719 F->ops->solvetranspose = MatSolve_MUMPS; 1720 F->ops->matsolve = MatMatSolve_MUMPS; 1721 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1722 #if defined(PETSC_USE_COMPLEX) 1723 F->ops->getinertia = NULL; 1724 #else 1725 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1726 #endif 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1731 { 1732 PetscErrorCode ierr; 1733 PetscBool iascii; 1734 PetscViewerFormat format; 1735 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1736 1737 PetscFunctionBegin; 1738 /* check if matrix is mumps type */ 1739 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1740 1741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1742 if (iascii) { 1743 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1744 if (format == PETSC_VIEWER_ASCII_INFO) { 1745 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1746 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1748 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1749 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1750 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1751 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1752 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1753 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1754 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1755 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1756 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1758 if (mumps->id.ICNTL(11)>0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1760 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1762 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1763 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1764 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1765 } 1766 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1767 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1769 /* ICNTL(15-17) not used */ 1770 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1776 1777 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1783 1784 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1790 1791 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1795 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1796 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1797 1798 /* infomation local to each processor */ 1799 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1801 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1802 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1805 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1807 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1808 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1809 1810 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1811 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1812 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1813 1814 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1815 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1816 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1817 1818 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1819 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1820 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1821 1822 if (mumps->ninfo && mumps->ninfo <= 80){ 1823 PetscInt i; 1824 for (i=0; i<mumps->ninfo; i++){ 1825 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1826 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1827 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1828 } 1829 } 1830 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1831 1832 if (!mumps->myid) { /* information from the host */ 1833 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1837 1838 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1845 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1852 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1859 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1860 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1862 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1863 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1864 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1865 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1866 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1868 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1869 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1870 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1871 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1872 } 1873 } 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1879 { 1880 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1881 1882 PetscFunctionBegin; 1883 info->block_size = 1.0; 1884 info->nz_allocated = mumps->id.INFOG(20); 1885 info->nz_used = mumps->id.INFOG(20); 1886 info->nz_unneeded = 0.0; 1887 info->assemblies = 0.0; 1888 info->mallocs = 0.0; 1889 info->memory = 0.0; 1890 info->fill_ratio_given = 0; 1891 info->fill_ratio_needed = 0; 1892 info->factor_mallocs = 0; 1893 PetscFunctionReturn(0); 1894 } 1895 1896 /* -------------------------------------------------------------------------------------------*/ 1897 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1898 { 1899 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1900 const PetscInt *idxs; 1901 PetscInt size,i; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1906 if (mumps->petsc_size > 1) { 1907 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1908 1909 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1910 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1911 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1912 } 1913 if (mumps->id.size_schur != size) { 1914 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1915 mumps->id.size_schur = size; 1916 mumps->id.schur_lld = size; 1917 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1918 } 1919 1920 /* Schur complement matrix */ 1921 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1922 if (mumps->sym == 1) { 1923 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1924 } 1925 1926 /* MUMPS expects Fortran style indices */ 1927 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1928 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1929 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1930 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1931 if (mumps->petsc_size > 1) { 1932 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1933 } else { 1934 if (F->factortype == MAT_FACTOR_LU) { 1935 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1936 } else { 1937 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1938 } 1939 } 1940 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1941 mumps->id.ICNTL(26) = -1; 1942 PetscFunctionReturn(0); 1943 } 1944 1945 /* -------------------------------------------------------------------------------------------*/ 1946 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1947 { 1948 Mat St; 1949 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1950 PetscScalar *array; 1951 #if defined(PETSC_USE_COMPLEX) 1952 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1953 #endif 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1958 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1959 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1960 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1961 ierr = MatSetUp(St);CHKERRQ(ierr); 1962 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1963 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1964 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1965 PetscInt i,j,N=mumps->id.size_schur; 1966 for (i=0;i<N;i++) { 1967 for (j=0;j<N;j++) { 1968 #if !defined(PETSC_USE_COMPLEX) 1969 PetscScalar val = mumps->id.schur[i*N+j]; 1970 #else 1971 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1972 #endif 1973 array[j*N+i] = val; 1974 } 1975 } 1976 } else { /* stored by columns */ 1977 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1978 } 1979 } else { /* either full or lower-triangular (not packed) */ 1980 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1981 PetscInt i,j,N=mumps->id.size_schur; 1982 for (i=0;i<N;i++) { 1983 for (j=i;j<N;j++) { 1984 #if !defined(PETSC_USE_COMPLEX) 1985 PetscScalar val = mumps->id.schur[i*N+j]; 1986 #else 1987 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1988 #endif 1989 array[i*N+j] = val; 1990 array[j*N+i] = val; 1991 } 1992 } 1993 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1994 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1995 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1996 PetscInt i,j,N=mumps->id.size_schur; 1997 for (i=0;i<N;i++) { 1998 for (j=0;j<i+1;j++) { 1999 #if !defined(PETSC_USE_COMPLEX) 2000 PetscScalar val = mumps->id.schur[i*N+j]; 2001 #else 2002 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2003 #endif 2004 array[i*N+j] = val; 2005 array[j*N+i] = val; 2006 } 2007 } 2008 } 2009 } 2010 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2011 *S = St; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 /* -------------------------------------------------------------------------------------------*/ 2016 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2017 { 2018 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2019 2020 PetscFunctionBegin; 2021 mumps->id.ICNTL(icntl) = ival; 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2026 { 2027 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2028 2029 PetscFunctionBegin; 2030 *ival = mumps->id.ICNTL(icntl); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /*@ 2035 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2036 2037 Logically Collective on Mat 2038 2039 Input Parameters: 2040 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2041 . icntl - index of MUMPS parameter array ICNTL() 2042 - ival - value of MUMPS ICNTL(icntl) 2043 2044 Options Database: 2045 . -mat_mumps_icntl_<icntl> <ival> 2046 2047 Level: beginner 2048 2049 References: 2050 . MUMPS Users' Guide 2051 2052 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2053 @*/ 2054 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2055 { 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 PetscValidType(F,1); 2060 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2061 PetscValidLogicalCollectiveInt(F,icntl,2); 2062 PetscValidLogicalCollectiveInt(F,ival,3); 2063 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 /*@ 2068 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2069 2070 Logically Collective on Mat 2071 2072 Input Parameters: 2073 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2074 - icntl - index of MUMPS parameter array ICNTL() 2075 2076 Output Parameter: 2077 . ival - value of MUMPS ICNTL(icntl) 2078 2079 Level: beginner 2080 2081 References: 2082 . MUMPS Users' Guide 2083 2084 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2085 @*/ 2086 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2087 { 2088 PetscErrorCode ierr; 2089 2090 PetscFunctionBegin; 2091 PetscValidType(F,1); 2092 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2093 PetscValidLogicalCollectiveInt(F,icntl,2); 2094 PetscValidIntPointer(ival,3); 2095 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 /* -------------------------------------------------------------------------------------------*/ 2100 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2101 { 2102 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2103 2104 PetscFunctionBegin; 2105 mumps->id.CNTL(icntl) = val; 2106 PetscFunctionReturn(0); 2107 } 2108 2109 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2110 { 2111 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2112 2113 PetscFunctionBegin; 2114 *val = mumps->id.CNTL(icntl); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /*@ 2119 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2120 2121 Logically Collective on Mat 2122 2123 Input Parameters: 2124 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2125 . icntl - index of MUMPS parameter array CNTL() 2126 - val - value of MUMPS CNTL(icntl) 2127 2128 Options Database: 2129 . -mat_mumps_cntl_<icntl> <val> 2130 2131 Level: beginner 2132 2133 References: 2134 . MUMPS Users' Guide 2135 2136 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2137 @*/ 2138 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2139 { 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 PetscValidType(F,1); 2144 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2145 PetscValidLogicalCollectiveInt(F,icntl,2); 2146 PetscValidLogicalCollectiveReal(F,val,3); 2147 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 /*@ 2152 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2153 2154 Logically Collective on Mat 2155 2156 Input Parameters: 2157 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2158 - icntl - index of MUMPS parameter array CNTL() 2159 2160 Output Parameter: 2161 . val - value of MUMPS CNTL(icntl) 2162 2163 Level: beginner 2164 2165 References: 2166 . MUMPS Users' Guide 2167 2168 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2169 @*/ 2170 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 PetscValidType(F,1); 2176 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2177 PetscValidLogicalCollectiveInt(F,icntl,2); 2178 PetscValidRealPointer(val,3); 2179 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2184 { 2185 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2186 2187 PetscFunctionBegin; 2188 *info = mumps->id.INFO(icntl); 2189 PetscFunctionReturn(0); 2190 } 2191 2192 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2193 { 2194 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2195 2196 PetscFunctionBegin; 2197 *infog = mumps->id.INFOG(icntl); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2202 { 2203 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2204 2205 PetscFunctionBegin; 2206 *rinfo = mumps->id.RINFO(icntl); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2211 { 2212 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2213 2214 PetscFunctionBegin; 2215 *rinfog = mumps->id.RINFOG(icntl); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2220 { 2221 PetscErrorCode ierr; 2222 Mat Bt = NULL,Btseq = NULL; 2223 PetscBool flg; 2224 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2225 PetscScalar *aa; 2226 PetscInt spnr,*ia,*ja; 2227 2228 PetscFunctionBegin; 2229 PetscValidIntPointer(spRHS,2); 2230 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2231 if (flg) { 2232 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2233 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2234 2235 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2236 2237 if (mumps->petsc_size > 1) { 2238 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2239 Btseq = b->A; 2240 } else { 2241 Btseq = Bt; 2242 } 2243 2244 if (!mumps->myid) { 2245 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2246 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2247 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2248 2249 mumps->id.irhs_ptr = ia; 2250 mumps->id.irhs_sparse = ja; 2251 mumps->id.nz_rhs = ia[spnr] - 1; 2252 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2253 } else { 2254 mumps->id.irhs_ptr = NULL; 2255 mumps->id.irhs_sparse = NULL; 2256 mumps->id.nz_rhs = 0; 2257 mumps->id.rhs_sparse = NULL; 2258 } 2259 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2260 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2261 2262 /* solve phase */ 2263 /*-------------*/ 2264 mumps->id.job = JOB_SOLVE; 2265 PetscMUMPS_c(mumps); 2266 if (mumps->id.INFOG(1) < 0) 2267 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2268 2269 if (!mumps->myid) { 2270 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2271 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2272 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2273 } 2274 PetscFunctionReturn(0); 2275 } 2276 2277 /*@ 2278 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2279 2280 Logically Collective on Mat 2281 2282 Input Parameters: 2283 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2284 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2285 2286 Output Parameter: 2287 . spRHS - requested entries of inverse of A 2288 2289 Level: beginner 2290 2291 References: 2292 . MUMPS Users' Guide 2293 2294 .seealso: MatGetFactor(), MatCreateTranspose() 2295 @*/ 2296 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2297 { 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 PetscValidType(F,1); 2302 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2303 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2308 { 2309 PetscErrorCode ierr; 2310 Mat spRHS; 2311 2312 PetscFunctionBegin; 2313 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2314 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2315 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 /*@ 2320 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2321 2322 Logically Collective on Mat 2323 2324 Input Parameters: 2325 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2326 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2327 2328 Output Parameter: 2329 . spRHST - requested entries of inverse of A^T 2330 2331 Level: beginner 2332 2333 References: 2334 . MUMPS Users' Guide 2335 2336 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2337 @*/ 2338 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2339 { 2340 PetscErrorCode ierr; 2341 PetscBool flg; 2342 2343 PetscFunctionBegin; 2344 PetscValidType(F,1); 2345 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2346 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2347 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2348 2349 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2350 PetscFunctionReturn(0); 2351 } 2352 2353 /*@ 2354 MatMumpsGetInfo - Get MUMPS parameter INFO() 2355 2356 Logically Collective on Mat 2357 2358 Input Parameters: 2359 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2360 - icntl - index of MUMPS parameter array INFO() 2361 2362 Output Parameter: 2363 . ival - value of MUMPS INFO(icntl) 2364 2365 Level: beginner 2366 2367 References: 2368 . MUMPS Users' Guide 2369 2370 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2371 @*/ 2372 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2373 { 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 PetscValidType(F,1); 2378 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2379 PetscValidIntPointer(ival,3); 2380 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 /*@ 2385 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2386 2387 Logically Collective on Mat 2388 2389 Input Parameters: 2390 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2391 - icntl - index of MUMPS parameter array INFOG() 2392 2393 Output Parameter: 2394 . ival - value of MUMPS INFOG(icntl) 2395 2396 Level: beginner 2397 2398 References: 2399 . MUMPS Users' Guide 2400 2401 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2402 @*/ 2403 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2404 { 2405 PetscErrorCode ierr; 2406 2407 PetscFunctionBegin; 2408 PetscValidType(F,1); 2409 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2410 PetscValidIntPointer(ival,3); 2411 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 /*@ 2416 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2417 2418 Logically Collective on Mat 2419 2420 Input Parameters: 2421 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2422 - icntl - index of MUMPS parameter array RINFO() 2423 2424 Output Parameter: 2425 . val - value of MUMPS RINFO(icntl) 2426 2427 Level: beginner 2428 2429 References: 2430 . MUMPS Users' Guide 2431 2432 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2433 @*/ 2434 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2435 { 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 PetscValidType(F,1); 2440 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2441 PetscValidRealPointer(val,3); 2442 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 /*@ 2447 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2448 2449 Logically Collective on Mat 2450 2451 Input Parameters: 2452 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2453 - icntl - index of MUMPS parameter array RINFOG() 2454 2455 Output Parameter: 2456 . val - value of MUMPS RINFOG(icntl) 2457 2458 Level: beginner 2459 2460 References: 2461 . MUMPS Users' Guide 2462 2463 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2464 @*/ 2465 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2466 { 2467 PetscErrorCode ierr; 2468 2469 PetscFunctionBegin; 2470 PetscValidType(F,1); 2471 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2472 PetscValidRealPointer(val,3); 2473 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 /*MC 2478 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2479 distributed and sequential matrices via the external package MUMPS. 2480 2481 Works with MATAIJ and MATSBAIJ matrices 2482 2483 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2484 2485 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2486 2487 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2488 2489 Options Database Keys: 2490 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2491 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2492 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2493 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2494 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2495 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2496 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2497 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2498 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2499 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2500 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2501 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2502 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2503 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2504 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2505 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2506 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2507 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2508 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2509 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2510 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2511 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2512 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2513 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2514 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2515 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2516 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2517 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2518 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2519 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2520 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2521 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2522 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2523 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2524 2525 Level: beginner 2526 2527 Notes: 2528 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2529 2530 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2531 $ KSPGetPC(ksp,&pc); 2532 $ PCFactorGetMatrix(pc,&mat); 2533 $ MatMumpsGetInfo(mat,....); 2534 $ MatMumpsGetInfog(mat,....); etc. 2535 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2536 2537 Two modes to run MUMPS/PETSc with OpenMP 2538 2539 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2540 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2541 2542 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2543 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2544 2545 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2546 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2547 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2548 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2549 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2550 2551 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2552 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2553 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2554 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2555 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2556 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2557 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2558 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2559 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2560 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2561 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2562 examine the mapping result. 2563 2564 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2565 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2566 calls omp_set_num_threads(m) internally before calling MUMPS. 2567 2568 References: 2569 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2570 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2571 2572 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2573 2574 M*/ 2575 2576 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2577 { 2578 PetscFunctionBegin; 2579 *type = MATSOLVERMUMPS; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /* MatGetFactor for Seq and MPI AIJ matrices */ 2584 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2585 { 2586 Mat B; 2587 PetscErrorCode ierr; 2588 Mat_MUMPS *mumps; 2589 PetscBool isSeqAIJ; 2590 2591 PetscFunctionBegin; 2592 /* Create the factorization matrix */ 2593 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2594 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2595 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2596 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2597 ierr = MatSetUp(B);CHKERRQ(ierr); 2598 2599 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2600 2601 B->ops->view = MatView_MUMPS; 2602 B->ops->getinfo = MatGetInfo_MUMPS; 2603 2604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2617 2618 if (ftype == MAT_FACTOR_LU) { 2619 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2620 B->factortype = MAT_FACTOR_LU; 2621 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2622 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2623 mumps->sym = 0; 2624 } else { 2625 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2626 B->factortype = MAT_FACTOR_CHOLESKY; 2627 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2628 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2629 #if defined(PETSC_USE_COMPLEX) 2630 mumps->sym = 2; 2631 #else 2632 if (A->spd_set && A->spd) mumps->sym = 1; 2633 else mumps->sym = 2; 2634 #endif 2635 } 2636 2637 /* set solvertype */ 2638 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2639 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2640 2641 B->ops->destroy = MatDestroy_MUMPS; 2642 B->data = (void*)mumps; 2643 2644 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2645 2646 *F = B; 2647 PetscFunctionReturn(0); 2648 } 2649 2650 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2651 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2652 { 2653 Mat B; 2654 PetscErrorCode ierr; 2655 Mat_MUMPS *mumps; 2656 PetscBool isSeqSBAIJ; 2657 2658 PetscFunctionBegin; 2659 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2660 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2661 /* Create the factorization matrix */ 2662 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2663 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2664 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2665 ierr = MatSetUp(B);CHKERRQ(ierr); 2666 2667 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2668 if (isSeqSBAIJ) { 2669 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2670 } else { 2671 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2672 } 2673 2674 B->ops->getinfo = MatGetInfo_External; 2675 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2676 B->ops->view = MatView_MUMPS; 2677 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2691 2692 B->factortype = MAT_FACTOR_CHOLESKY; 2693 #if defined(PETSC_USE_COMPLEX) 2694 mumps->sym = 2; 2695 #else 2696 if (A->spd_set && A->spd) mumps->sym = 1; 2697 else mumps->sym = 2; 2698 #endif 2699 2700 /* set solvertype */ 2701 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2702 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2703 2704 B->ops->destroy = MatDestroy_MUMPS; 2705 B->data = (void*)mumps; 2706 2707 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2708 2709 *F = B; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2714 { 2715 Mat B; 2716 PetscErrorCode ierr; 2717 Mat_MUMPS *mumps; 2718 PetscBool isSeqBAIJ; 2719 2720 PetscFunctionBegin; 2721 /* Create the factorization matrix */ 2722 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2723 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2724 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2725 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2726 ierr = MatSetUp(B);CHKERRQ(ierr); 2727 2728 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2729 if (ftype == MAT_FACTOR_LU) { 2730 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2731 B->factortype = MAT_FACTOR_LU; 2732 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2733 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2734 mumps->sym = 0; 2735 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2736 2737 B->ops->getinfo = MatGetInfo_External; 2738 B->ops->view = MatView_MUMPS; 2739 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2751 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2752 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2753 2754 /* set solvertype */ 2755 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2756 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2757 2758 B->ops->destroy = MatDestroy_MUMPS; 2759 B->data = (void*)mumps; 2760 2761 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2762 2763 *F = B; 2764 PetscFunctionReturn(0); 2765 } 2766 2767 /* MatGetFactor for Seq and MPI SELL matrices */ 2768 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2769 { 2770 Mat B; 2771 PetscErrorCode ierr; 2772 Mat_MUMPS *mumps; 2773 PetscBool isSeqSELL; 2774 2775 PetscFunctionBegin; 2776 /* Create the factorization matrix */ 2777 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2778 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2779 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2780 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2781 ierr = MatSetUp(B);CHKERRQ(ierr); 2782 2783 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2784 2785 B->ops->view = MatView_MUMPS; 2786 B->ops->getinfo = MatGetInfo_MUMPS; 2787 2788 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2789 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2790 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2791 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2792 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2793 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2794 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2795 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2796 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2797 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2798 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2799 2800 if (ftype == MAT_FACTOR_LU) { 2801 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2802 B->factortype = MAT_FACTOR_LU; 2803 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2804 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2805 mumps->sym = 0; 2806 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2807 2808 /* set solvertype */ 2809 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2810 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2811 2812 B->ops->destroy = MatDestroy_MUMPS; 2813 B->data = (void*)mumps; 2814 2815 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2816 2817 *F = B; 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2822 { 2823 PetscErrorCode ierr; 2824 2825 PetscFunctionBegin; 2826 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2827 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2828 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2829 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2830 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2831 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2832 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2833 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2834 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2835 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2836 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2837 PetscFunctionReturn(0); 2838 } 2839 2840