1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj; 339 PetscInt nz,rnz,i,j,k,m,bs; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 PetscScalar *val; 343 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 344 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 345 #if defined(PETSC_USE_COMPLEX) 346 PetscBool hermitian; 347 #endif 348 349 PetscFunctionBegin; 350 #if defined(PETSC_USE_COMPLEX) 351 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 352 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 353 #endif 354 ai = aa->i; 355 aj = aa->j; 356 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 357 if (reuse == MAT_INITIAL_MATRIX) { 358 nz = aa->nz; 359 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 360 col = row + bs2*nz; 361 if (bs>1) 362 val = (PetscScalar*)(col + bs2*nz); 363 else 364 val = aa->a; 365 366 *r = row; *c = col; *v = val; 367 } else { 368 row = *r; col = *c; val = *v; 369 } 370 371 nz = 0; 372 if (bs>1) { 373 for (i=0; i<mbs; i++) { 374 rnz = ai[i+1] - ai[i]; 375 ajj = aj + ai[i]; 376 for (j=0; j<rnz; j++) { 377 for (k=0; k<bs; k++) { 378 for (m=0; m<bs; m++) { 379 if (ajj[j]>i || m>=k) { 380 if (reuse == MAT_INITIAL_MATRIX) { 381 row[nz] = i*bs + m + shift; 382 col[nz] = ajj[j]*bs + k + shift; 383 } 384 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 385 } 386 } 387 } 388 } 389 } 390 } else if (reuse == MAT_INITIAL_MATRIX) { 391 for (i=0; i<mbs; i++) { 392 rnz = ai[i+1] - ai[i]; 393 ajj = aj + ai[i]; 394 for (j=0; j<rnz; j++) { 395 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 396 } 397 } 398 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 399 } 400 *nnz = nz; 401 PetscFunctionReturn(0); 402 } 403 404 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 405 { 406 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 407 PetscInt nz,rnz,i,j; 408 const PetscScalar *av,*v1; 409 PetscScalar *val; 410 PetscErrorCode ierr; 411 PetscInt *row,*col; 412 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 413 PetscBool missing; 414 #if defined(PETSC_USE_COMPLEX) 415 PetscBool hermitian; 416 #endif 417 418 PetscFunctionBegin; 419 #if defined(PETSC_USE_COMPLEX) 420 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 421 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 422 #endif 423 ai = aa->i; aj = aa->j; av = aa->a; 424 adiag = aa->diag; 425 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 426 if (reuse == MAT_INITIAL_MATRIX) { 427 /* count nz in the upper triangular part of A */ 428 nz = 0; 429 if (missing) { 430 for (i=0; i<M; i++) { 431 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 432 for (j=ai[i];j<ai[i+1];j++) { 433 if (aj[j] < i) continue; 434 nz++; 435 } 436 } else { 437 nz += ai[i+1] - adiag[i]; 438 } 439 } 440 } else { 441 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 442 } 443 *nnz = nz; 444 445 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 446 col = row + nz; 447 val = (PetscScalar*)(col + nz); 448 449 nz = 0; 450 if (missing) { 451 for (i=0; i<M; i++) { 452 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 453 for (j=ai[i];j<ai[i+1];j++) { 454 if (aj[j] < i) continue; 455 row[nz] = i+shift; 456 col[nz] = aj[j]+shift; 457 val[nz] = av[j]; 458 nz++; 459 } 460 } else { 461 rnz = ai[i+1] - adiag[i]; 462 ajj = aj + adiag[i]; 463 v1 = av + adiag[i]; 464 for (j=0; j<rnz; j++) { 465 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 466 } 467 } 468 } 469 } else { 470 for (i=0; i<M; i++) { 471 rnz = ai[i+1] - adiag[i]; 472 ajj = aj + adiag[i]; 473 v1 = av + adiag[i]; 474 for (j=0; j<rnz; j++) { 475 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 476 } 477 } 478 } 479 *r = row; *c = col; *v = val; 480 } else { 481 nz = 0; val = *v; 482 if (missing) { 483 for (i=0; i <M; i++) { 484 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 485 for (j=ai[i];j<ai[i+1];j++) { 486 if (aj[j] < i) continue; 487 val[nz++] = av[j]; 488 } 489 } else { 490 rnz = ai[i+1] - adiag[i]; 491 v1 = av + adiag[i]; 492 for (j=0; j<rnz; j++) { 493 val[nz++] = v1[j]; 494 } 495 } 496 } 497 } else { 498 for (i=0; i <M; i++) { 499 rnz = ai[i+1] - adiag[i]; 500 v1 = av + adiag[i]; 501 for (j=0; j<rnz; j++) { 502 val[nz++] = v1[j]; 503 } 504 } 505 } 506 } 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 511 { 512 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 513 PetscErrorCode ierr; 514 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 515 PetscInt *row,*col; 516 const PetscScalar *av, *bv,*v1,*v2; 517 PetscScalar *val; 518 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 519 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 520 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 521 #if defined(PETSC_USE_COMPLEX) 522 PetscBool hermitian; 523 #endif 524 525 PetscFunctionBegin; 526 #if defined(PETSC_USE_COMPLEX) 527 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 528 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 529 #endif 530 rstart = A->rmap->rstart; 531 ai = aa->i; 532 aj = aa->j; 533 bi = bb->i; 534 bj = bb->j; 535 av = aa->a; 536 bv = bb->a; 537 538 garray = mat->garray; 539 540 if (reuse == MAT_INITIAL_MATRIX) { 541 nz = aa->nz + bb->nz; 542 *nnz = nz; 543 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 544 col = row + nz; 545 val = (PetscScalar*)(col + nz); 546 547 *r = row; *c = col; *v = val; 548 } else { 549 row = *r; col = *c; val = *v; 550 } 551 552 jj = 0; irow = rstart; 553 for (i=0; i<m; i++) { 554 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 555 countA = ai[i+1] - ai[i]; 556 countB = bi[i+1] - bi[i]; 557 bjj = bj + bi[i]; 558 v1 = av + ai[i]; 559 v2 = bv + bi[i]; 560 561 /* A-part */ 562 for (j=0; j<countA; j++) { 563 if (reuse == MAT_INITIAL_MATRIX) { 564 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 565 } 566 val[jj++] = v1[j]; 567 } 568 569 /* B-part */ 570 for (j=0; j < countB; j++) { 571 if (reuse == MAT_INITIAL_MATRIX) { 572 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 573 } 574 val[jj++] = v2[j]; 575 } 576 irow++; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 582 { 583 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 584 PetscErrorCode ierr; 585 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 586 PetscInt *row,*col; 587 const PetscScalar *av, *bv,*v1,*v2; 588 PetscScalar *val; 589 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 590 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 591 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 592 593 PetscFunctionBegin; 594 rstart = A->rmap->rstart; 595 ai = aa->i; 596 aj = aa->j; 597 bi = bb->i; 598 bj = bb->j; 599 av = aa->a; 600 bv = bb->a; 601 602 garray = mat->garray; 603 604 if (reuse == MAT_INITIAL_MATRIX) { 605 nz = aa->nz + bb->nz; 606 *nnz = nz; 607 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 608 col = row + nz; 609 val = (PetscScalar*)(col + nz); 610 611 *r = row; *c = col; *v = val; 612 } else { 613 row = *r; col = *c; val = *v; 614 } 615 616 jj = 0; irow = rstart; 617 for (i=0; i<m; i++) { 618 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 619 countA = ai[i+1] - ai[i]; 620 countB = bi[i+1] - bi[i]; 621 bjj = bj + bi[i]; 622 v1 = av + ai[i]; 623 v2 = bv + bi[i]; 624 625 /* A-part */ 626 for (j=0; j<countA; j++) { 627 if (reuse == MAT_INITIAL_MATRIX) { 628 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 629 } 630 val[jj++] = v1[j]; 631 } 632 633 /* B-part */ 634 for (j=0; j < countB; j++) { 635 if (reuse == MAT_INITIAL_MATRIX) { 636 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 637 } 638 val[jj++] = v2[j]; 639 } 640 irow++; 641 } 642 PetscFunctionReturn(0); 643 } 644 645 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 646 { 647 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 648 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 649 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 650 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 651 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 652 const PetscInt bs2=mat->bs2; 653 PetscErrorCode ierr; 654 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 655 PetscInt *row,*col; 656 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 657 PetscScalar *val; 658 659 PetscFunctionBegin; 660 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 661 if (reuse == MAT_INITIAL_MATRIX) { 662 nz = bs2*(aa->nz + bb->nz); 663 *nnz = nz; 664 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 665 col = row + nz; 666 val = (PetscScalar*)(col + nz); 667 668 *r = row; *c = col; *v = val; 669 } else { 670 row = *r; col = *c; val = *v; 671 } 672 673 jj = 0; irow = rstart; 674 for (i=0; i<mbs; i++) { 675 countA = ai[i+1] - ai[i]; 676 countB = bi[i+1] - bi[i]; 677 ajj = aj + ai[i]; 678 bjj = bj + bi[i]; 679 v1 = av + bs2*ai[i]; 680 v2 = bv + bs2*bi[i]; 681 682 idx = 0; 683 /* A-part */ 684 for (k=0; k<countA; k++) { 685 for (j=0; j<bs; j++) { 686 for (n=0; n<bs; n++) { 687 if (reuse == MAT_INITIAL_MATRIX) { 688 row[jj] = irow + n + shift; 689 col[jj] = rstart + bs*ajj[k] + j + shift; 690 } 691 val[jj++] = v1[idx++]; 692 } 693 } 694 } 695 696 idx = 0; 697 /* B-part */ 698 for (k=0; k<countB; k++) { 699 for (j=0; j<bs; j++) { 700 for (n=0; n<bs; n++) { 701 if (reuse == MAT_INITIAL_MATRIX) { 702 row[jj] = irow + n + shift; 703 col[jj] = bs*garray[bjj[k]] + j + shift; 704 } 705 val[jj++] = v2[idx++]; 706 } 707 } 708 } 709 irow += bs; 710 } 711 PetscFunctionReturn(0); 712 } 713 714 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 715 { 716 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 717 PetscErrorCode ierr; 718 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 719 PetscInt *row,*col; 720 const PetscScalar *av, *bv,*v1,*v2; 721 PetscScalar *val; 722 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 723 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 724 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 725 #if defined(PETSC_USE_COMPLEX) 726 PetscBool hermitian; 727 #endif 728 729 PetscFunctionBegin; 730 #if defined(PETSC_USE_COMPLEX) 731 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 732 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 733 #endif 734 ai = aa->i; 735 aj = aa->j; 736 adiag = aa->diag; 737 bi = bb->i; 738 bj = bb->j; 739 garray = mat->garray; 740 av = aa->a; 741 bv = bb->a; 742 743 rstart = A->rmap->rstart; 744 745 if (reuse == MAT_INITIAL_MATRIX) { 746 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 747 nzb = 0; /* num of upper triangular entries in mat->B */ 748 for (i=0; i<m; i++) { 749 nza += (ai[i+1] - adiag[i]); 750 countB = bi[i+1] - bi[i]; 751 bjj = bj + bi[i]; 752 for (j=0; j<countB; j++) { 753 if (garray[bjj[j]] > rstart) nzb++; 754 } 755 } 756 757 nz = nza + nzb; /* total nz of upper triangular part of mat */ 758 *nnz = nz; 759 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 760 col = row + nz; 761 val = (PetscScalar*)(col + nz); 762 763 *r = row; *c = col; *v = val; 764 } else { 765 row = *r; col = *c; val = *v; 766 } 767 768 jj = 0; irow = rstart; 769 for (i=0; i<m; i++) { 770 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 771 v1 = av + adiag[i]; 772 countA = ai[i+1] - adiag[i]; 773 countB = bi[i+1] - bi[i]; 774 bjj = bj + bi[i]; 775 v2 = bv + bi[i]; 776 777 /* A-part */ 778 for (j=0; j<countA; j++) { 779 if (reuse == MAT_INITIAL_MATRIX) { 780 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 781 } 782 val[jj++] = v1[j]; 783 } 784 785 /* B-part */ 786 for (j=0; j < countB; j++) { 787 if (garray[bjj[j]] > rstart) { 788 if (reuse == MAT_INITIAL_MATRIX) { 789 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 790 } 791 val[jj++] = v2[j]; 792 } 793 } 794 irow++; 795 } 796 PetscFunctionReturn(0); 797 } 798 799 PetscErrorCode MatDestroy_MUMPS(Mat A) 800 { 801 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 802 PetscErrorCode ierr; 803 804 PetscFunctionBegin; 805 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 806 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 807 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 808 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 809 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 810 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 811 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 812 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 813 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 814 mumps->id.job = JOB_END; 815 PetscMUMPS_c(mumps); 816 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 817 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 818 #endif 819 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 820 ierr = PetscFree(A->data);CHKERRQ(ierr); 821 822 /* clear composed functions */ 823 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 824 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 825 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 826 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 827 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 828 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 829 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 830 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 831 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 832 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 833 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 834 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 835 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 840 { 841 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 842 PetscScalar *array; 843 Vec b_seq; 844 IS is_iden,is_petsc; 845 PetscErrorCode ierr; 846 PetscInt i; 847 PetscBool second_solve = PETSC_FALSE; 848 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 849 850 PetscFunctionBegin; 851 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 852 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 853 854 if (A->factorerrortype) { 855 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 856 ierr = VecSetInf(x);CHKERRQ(ierr); 857 PetscFunctionReturn(0); 858 } 859 860 mumps->id.ICNTL(20)= 0; /* dense RHS */ 861 mumps->id.nrhs = 1; 862 b_seq = mumps->b_seq; 863 if (mumps->petsc_size > 1) { 864 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 865 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 868 } else { /* petsc_size == 1 */ 869 ierr = VecCopy(b,x);CHKERRQ(ierr); 870 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 871 } 872 if (!mumps->myid) { /* define rhs on the host */ 873 mumps->id.nrhs = 1; 874 mumps->id.rhs = (MumpsScalar*)array; 875 } 876 877 /* 878 handle condensation step of Schur complement (if any) 879 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 880 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 881 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 882 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 883 */ 884 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 885 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 886 second_solve = PETSC_TRUE; 887 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 888 } 889 /* solve phase */ 890 /*-------------*/ 891 mumps->id.job = JOB_SOLVE; 892 PetscMUMPS_c(mumps); 893 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 894 895 /* handle expansion step of Schur complement (if any) */ 896 if (second_solve) { 897 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 898 } 899 900 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 901 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 902 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 903 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 904 } 905 if (!mumps->scat_sol) { /* create scatter scat_sol */ 906 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 907 for (i=0; i<mumps->id.lsol_loc; i++) { 908 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 909 } 910 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 911 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 912 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 913 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 914 915 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 916 } 917 918 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 } 921 922 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 923 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 924 925 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 930 { 931 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 932 PetscErrorCode ierr; 933 934 PetscFunctionBegin; 935 mumps->id.ICNTL(9) = 0; 936 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 937 mumps->id.ICNTL(9) = 1; 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 942 { 943 PetscErrorCode ierr; 944 Mat Bt = NULL; 945 PetscBool flg, flgT; 946 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 947 PetscInt i,nrhs,M; 948 PetscScalar *array; 949 const PetscScalar *rbray; 950 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 951 PetscScalar *bray,*sol_loc,*sol_loc_save; 952 IS is_to,is_from; 953 PetscInt k,proc,j,m,myrstart; 954 const PetscInt *rstart; 955 Vec v_mpi,b_seq,msol_loc; 956 VecScatter scat_rhs,scat_sol; 957 PetscScalar *aa; 958 PetscInt spnr,*ia,*ja; 959 Mat_MPIAIJ *b = NULL; 960 961 PetscFunctionBegin; 962 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 963 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 964 965 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 966 if (flg) { /* dense B */ 967 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 968 mumps->id.ICNTL(20)= 0; /* dense RHS */ 969 } else { /* sparse B */ 970 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 971 if (flgT) { /* input B is transpose of actural RHS matrix, 972 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 973 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 974 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 975 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 976 } 977 978 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 979 mumps->id.nrhs = nrhs; 980 mumps->id.lrhs = M; 981 mumps->id.rhs = NULL; 982 983 if (mumps->petsc_size == 1) { 984 PetscScalar *aa; 985 PetscInt spnr,*ia,*ja; 986 PetscBool second_solve = PETSC_FALSE; 987 988 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 989 mumps->id.rhs = (MumpsScalar*)array; 990 991 if (!Bt) { /* dense B */ 992 /* copy B to X */ 993 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 994 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 995 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 996 } else { /* sparse B */ 997 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 998 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 999 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1000 /* mumps requires ia and ja start at 1! */ 1001 mumps->id.irhs_ptr = ia; 1002 mumps->id.irhs_sparse = ja; 1003 mumps->id.nz_rhs = ia[spnr] - 1; 1004 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1005 } 1006 /* handle condensation step of Schur complement (if any) */ 1007 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1008 second_solve = PETSC_TRUE; 1009 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1010 } 1011 /* solve phase */ 1012 /*-------------*/ 1013 mumps->id.job = JOB_SOLVE; 1014 PetscMUMPS_c(mumps); 1015 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1016 1017 /* handle expansion step of Schur complement (if any) */ 1018 if (second_solve) { 1019 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1020 } 1021 if (Bt) { /* sparse B */ 1022 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1023 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1024 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1025 } 1026 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1031 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1032 1033 /* create msol_loc to hold mumps local solution */ 1034 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1035 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1036 1037 lsol_loc = mumps->id.lsol_loc; 1038 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1039 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1040 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1041 mumps->id.isol_loc = isol_loc; 1042 1043 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1044 1045 if (!Bt) { /* dense B */ 1046 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1047 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1048 0, re-arrange B into desired order, which is a local operation. 1049 */ 1050 1051 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1052 /* wrap dense rhs matrix B into a vector v_mpi */ 1053 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1054 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1055 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1056 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1057 1058 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1059 if (!mumps->myid) { 1060 PetscInt *idx; 1061 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1062 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1063 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1064 k = 0; 1065 for (proc=0; proc<mumps->petsc_size; proc++){ 1066 for (j=0; j<nrhs; j++){ 1067 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1068 } 1069 } 1070 1071 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1072 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1073 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1074 } else { 1075 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1076 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1077 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1078 } 1079 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1080 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1082 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 1085 if (!mumps->myid) { /* define rhs on the host */ 1086 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1087 mumps->id.rhs = (MumpsScalar*)bray; 1088 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1089 } 1090 1091 } else { /* sparse B */ 1092 b = (Mat_MPIAIJ*)Bt->data; 1093 1094 /* wrap dense X into a vector v_mpi */ 1095 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1096 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1097 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1098 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1099 1100 if (!mumps->myid) { 1101 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1102 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1103 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1104 /* mumps requires ia and ja start at 1! */ 1105 mumps->id.irhs_ptr = ia; 1106 mumps->id.irhs_sparse = ja; 1107 mumps->id.nz_rhs = ia[spnr] - 1; 1108 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1109 } else { 1110 mumps->id.irhs_ptr = NULL; 1111 mumps->id.irhs_sparse = NULL; 1112 mumps->id.nz_rhs = 0; 1113 mumps->id.rhs_sparse = NULL; 1114 } 1115 } 1116 1117 /* solve phase */ 1118 /*-------------*/ 1119 mumps->id.job = JOB_SOLVE; 1120 PetscMUMPS_c(mumps); 1121 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1122 1123 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1124 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1125 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1126 1127 /* create scatter scat_sol */ 1128 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1129 /* iidx: index for scatter mumps solution to petsc X */ 1130 1131 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1132 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1133 for (i=0; i<lsol_loc; i++) { 1134 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1135 1136 for (proc=0; proc<mumps->petsc_size; proc++){ 1137 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1138 myrstart = rstart[proc]; 1139 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1140 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1141 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1142 break; 1143 } 1144 } 1145 1146 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1147 } 1148 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1149 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1150 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1152 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1153 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1154 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1155 1156 /* free spaces */ 1157 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1158 mumps->id.isol_loc = isol_loc_save; 1159 1160 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1161 ierr = PetscFree(idxx);CHKERRQ(ierr); 1162 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1163 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1164 if (Bt) { 1165 if (!mumps->myid) { 1166 b = (Mat_MPIAIJ*)Bt->data; 1167 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1168 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1169 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1170 } 1171 } else { 1172 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1173 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1174 } 1175 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1176 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1181 { 1182 PetscErrorCode ierr; 1183 PetscBool flg; 1184 Mat B; 1185 1186 PetscFunctionBegin; 1187 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1188 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1189 1190 /* Create B=Bt^T that uses Bt's data structure */ 1191 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1192 1193 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1194 ierr = MatDestroy(&B);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #if !defined(PETSC_USE_COMPLEX) 1199 /* 1200 input: 1201 F: numeric factor 1202 output: 1203 nneg: total number of negative pivots 1204 nzero: total number of zero pivots 1205 npos: (global dimension of F) - nneg - nzero 1206 */ 1207 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1208 { 1209 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1210 PetscErrorCode ierr; 1211 PetscMPIInt size; 1212 1213 PetscFunctionBegin; 1214 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1215 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1216 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1217 1218 if (nneg) *nneg = mumps->id.INFOG(12); 1219 if (nzero || npos) { 1220 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1221 if (nzero) *nzero = mumps->id.INFOG(28); 1222 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1223 } 1224 PetscFunctionReturn(0); 1225 } 1226 #endif 1227 1228 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1229 { 1230 PetscErrorCode ierr; 1231 PetscInt i,nz=0,*irn,*jcn=0; 1232 PetscScalar *val=0; 1233 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1234 1235 PetscFunctionBegin; 1236 if (mumps->omp_comm_size > 1) { 1237 if (reuse == MAT_INITIAL_MATRIX) { 1238 /* master first gathers counts of nonzeros to receive */ 1239 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1240 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1241 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1242 1243 /* master allocates memory to receive nonzeros */ 1244 if (mumps->is_omp_master) { 1245 displs[0] = 0; 1246 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1247 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1248 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1249 jcn = irn + nz; 1250 val = (PetscScalar*)(jcn + nz); 1251 } 1252 1253 /* save the gatherv plan */ 1254 mumps->mpinz = mpinz; /* used as send count */ 1255 mumps->recvcount = recvcount; 1256 mumps->displs = displs; 1257 1258 /* master gathers nonzeros */ 1259 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1260 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1261 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1262 1263 /* master frees its row/col/val and replaces them with bigger arrays */ 1264 if (mumps->is_omp_master) { 1265 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1266 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1267 mumps->irn = irn; 1268 mumps->jcn = jcn; 1269 mumps->val = val; 1270 } 1271 } else { 1272 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1273 } 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1279 { 1280 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1281 PetscErrorCode ierr; 1282 PetscBool isMPIAIJ; 1283 1284 PetscFunctionBegin; 1285 if (mumps->id.INFOG(1) < 0) { 1286 if (mumps->id.INFOG(1) == -6) { 1287 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1288 } 1289 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1294 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1295 1296 /* numerical factorization phase */ 1297 /*-------------------------------*/ 1298 mumps->id.job = JOB_FACTNUMERIC; 1299 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1300 if (!mumps->myid) { 1301 mumps->id.a = (MumpsScalar*)mumps->val; 1302 } 1303 } else { 1304 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1305 } 1306 PetscMUMPS_c(mumps); 1307 if (mumps->id.INFOG(1) < 0) { 1308 if (A->erroriffailure) { 1309 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1310 } else { 1311 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1312 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1313 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1314 } else if (mumps->id.INFOG(1) == -13) { 1315 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1316 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1317 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1318 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1319 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1320 } else { 1321 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1322 F->factorerrortype = MAT_FACTOR_OTHER; 1323 } 1324 } 1325 } 1326 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1327 1328 F->assembled = PETSC_TRUE; 1329 mumps->matstruc = SAME_NONZERO_PATTERN; 1330 if (F->schur) { /* reset Schur status to unfactored */ 1331 #if defined(PETSC_HAVE_CUDA) 1332 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1333 #endif 1334 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1335 mumps->id.ICNTL(19) = 2; 1336 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1337 } 1338 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1339 } 1340 1341 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1342 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1343 1344 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1345 if (mumps->petsc_size > 1) { 1346 PetscInt lsol_loc; 1347 PetscScalar *sol_loc; 1348 1349 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1350 1351 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1352 if (mumps->x_seq) { 1353 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1354 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1355 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1356 } 1357 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1358 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1359 mumps->id.lsol_loc = lsol_loc; 1360 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1361 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1362 } 1363 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 /* Sets MUMPS options from the options database */ 1368 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1369 { 1370 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1371 PetscErrorCode ierr; 1372 PetscInt icntl,info[80],i,ninfo=80; 1373 PetscBool flg; 1374 1375 PetscFunctionBegin; 1376 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1377 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1378 if (flg) mumps->id.ICNTL(1) = icntl; 1379 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1380 if (flg) mumps->id.ICNTL(2) = icntl; 1381 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1382 if (flg) mumps->id.ICNTL(3) = icntl; 1383 1384 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1385 if (flg) mumps->id.ICNTL(4) = icntl; 1386 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1387 1388 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1389 if (flg) mumps->id.ICNTL(6) = icntl; 1390 1391 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1392 if (flg) { 1393 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1394 else mumps->id.ICNTL(7) = icntl; 1395 } 1396 1397 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1398 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1399 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1400 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1401 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1402 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1403 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1404 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1405 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1406 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1407 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1408 } 1409 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1410 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1411 1412 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1413 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1414 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1415 if (mumps->id.ICNTL(24)) { 1416 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1417 } 1418 1419 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1420 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1421 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1422 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1423 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1424 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1425 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1426 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1427 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1428 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1429 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1430 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1431 1432 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1433 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1434 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1435 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1436 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1437 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1438 1439 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1440 1441 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1442 if (ninfo) { 1443 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1444 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1445 mumps->ninfo = ninfo; 1446 for (i=0; i<ninfo; i++) { 1447 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1448 else mumps->info[i] = info[i]; 1449 } 1450 } 1451 1452 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } 1455 1456 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1457 { 1458 PetscErrorCode ierr; 1459 PetscInt nthreads=0; 1460 1461 PetscFunctionBegin; 1462 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1463 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1464 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1465 1466 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1467 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1468 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1469 if (mumps->use_petsc_omp_support) { 1470 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1471 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1472 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1473 #else 1474 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1475 #endif 1476 } else { 1477 mumps->omp_comm = PETSC_COMM_SELF; 1478 mumps->mumps_comm = mumps->petsc_comm; 1479 mumps->is_omp_master = PETSC_TRUE; 1480 } 1481 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1482 1483 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1484 mumps->id.job = JOB_INIT; 1485 mumps->id.par = 1; /* host participates factorizaton and solve */ 1486 mumps->id.sym = mumps->sym; 1487 1488 PetscMUMPS_c(mumps); 1489 1490 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1491 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1492 */ 1493 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1494 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1495 1496 mumps->scat_rhs = NULL; 1497 mumps->scat_sol = NULL; 1498 1499 /* set PETSc-MUMPS default options - override MUMPS default */ 1500 mumps->id.ICNTL(3) = 0; 1501 mumps->id.ICNTL(4) = 0; 1502 if (mumps->petsc_size == 1) { 1503 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1504 } else { 1505 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1506 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1507 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1508 } 1509 1510 /* schur */ 1511 mumps->id.size_schur = 0; 1512 mumps->id.listvar_schur = NULL; 1513 mumps->id.schur = NULL; 1514 mumps->sizeredrhs = 0; 1515 mumps->schur_sol = NULL; 1516 mumps->schur_sizesol = 0; 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1521 { 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1526 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1527 if (mumps->id.INFOG(1) < 0) { 1528 if (A->erroriffailure) { 1529 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1530 } else { 1531 if (mumps->id.INFOG(1) == -6) { 1532 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1533 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1534 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1535 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1536 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1537 } else { 1538 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1539 F->factorerrortype = MAT_FACTOR_OTHER; 1540 } 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1547 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1548 { 1549 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1550 PetscErrorCode ierr; 1551 Vec b; 1552 const PetscInt M = A->rmap->N; 1553 1554 PetscFunctionBegin; 1555 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1556 1557 /* Set MUMPS options from the options database */ 1558 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1559 1560 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1561 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1562 1563 /* analysis phase */ 1564 /*----------------*/ 1565 mumps->id.job = JOB_FACTSYMBOLIC; 1566 mumps->id.n = M; 1567 switch (mumps->id.ICNTL(18)) { 1568 case 0: /* centralized assembled matrix input */ 1569 if (!mumps->myid) { 1570 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1571 if (mumps->id.ICNTL(6)>1) { 1572 mumps->id.a = (MumpsScalar*)mumps->val; 1573 } 1574 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1575 /* 1576 PetscBool flag; 1577 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1578 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1579 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1580 */ 1581 if (!mumps->myid) { 1582 const PetscInt *idx; 1583 PetscInt i,*perm_in; 1584 1585 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1586 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1587 1588 mumps->id.perm_in = perm_in; 1589 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1590 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1591 } 1592 } 1593 } 1594 break; 1595 case 3: /* distributed assembled matrix input (size>1) */ 1596 mumps->id.nz_loc = mumps->nz; 1597 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1598 if (mumps->id.ICNTL(6)>1) { 1599 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1600 } 1601 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1602 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1603 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1604 ierr = VecDestroy(&b);CHKERRQ(ierr); 1605 break; 1606 } 1607 PetscMUMPS_c(mumps); 1608 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1609 1610 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1611 F->ops->solve = MatSolve_MUMPS; 1612 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1613 F->ops->matsolve = MatMatSolve_MUMPS; 1614 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1615 PetscFunctionReturn(0); 1616 } 1617 1618 /* Note the Petsc r and c permutations are ignored */ 1619 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1620 { 1621 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1622 PetscErrorCode ierr; 1623 Vec b; 1624 const PetscInt M = A->rmap->N; 1625 1626 PetscFunctionBegin; 1627 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1628 1629 /* Set MUMPS options from the options database */ 1630 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1631 1632 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1633 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1634 1635 /* analysis phase */ 1636 /*----------------*/ 1637 mumps->id.job = JOB_FACTSYMBOLIC; 1638 mumps->id.n = M; 1639 switch (mumps->id.ICNTL(18)) { 1640 case 0: /* centralized assembled matrix input */ 1641 if (!mumps->myid) { 1642 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1643 if (mumps->id.ICNTL(6)>1) { 1644 mumps->id.a = (MumpsScalar*)mumps->val; 1645 } 1646 } 1647 break; 1648 case 3: /* distributed assembled matrix input (size>1) */ 1649 mumps->id.nz_loc = mumps->nz; 1650 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1651 if (mumps->id.ICNTL(6)>1) { 1652 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1653 } 1654 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1655 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1656 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1657 ierr = VecDestroy(&b);CHKERRQ(ierr); 1658 break; 1659 } 1660 PetscMUMPS_c(mumps); 1661 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1662 1663 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1664 F->ops->solve = MatSolve_MUMPS; 1665 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 /* Note the Petsc r permutation and factor info are ignored */ 1670 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1671 { 1672 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1673 PetscErrorCode ierr; 1674 Vec b; 1675 const PetscInt M = A->rmap->N; 1676 1677 PetscFunctionBegin; 1678 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1679 1680 /* Set MUMPS options from the options database */ 1681 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1682 1683 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1684 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1685 1686 /* analysis phase */ 1687 /*----------------*/ 1688 mumps->id.job = JOB_FACTSYMBOLIC; 1689 mumps->id.n = M; 1690 switch (mumps->id.ICNTL(18)) { 1691 case 0: /* centralized assembled matrix input */ 1692 if (!mumps->myid) { 1693 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1694 if (mumps->id.ICNTL(6)>1) { 1695 mumps->id.a = (MumpsScalar*)mumps->val; 1696 } 1697 } 1698 break; 1699 case 3: /* distributed assembled matrix input (size>1) */ 1700 mumps->id.nz_loc = mumps->nz; 1701 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1702 if (mumps->id.ICNTL(6)>1) { 1703 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1704 } 1705 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1706 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1707 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1708 ierr = VecDestroy(&b);CHKERRQ(ierr); 1709 break; 1710 } 1711 PetscMUMPS_c(mumps); 1712 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1713 1714 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1715 F->ops->solve = MatSolve_MUMPS; 1716 F->ops->solvetranspose = MatSolve_MUMPS; 1717 F->ops->matsolve = MatMatSolve_MUMPS; 1718 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1719 #if defined(PETSC_USE_COMPLEX) 1720 F->ops->getinertia = NULL; 1721 #else 1722 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1723 #endif 1724 PetscFunctionReturn(0); 1725 } 1726 1727 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1728 { 1729 PetscErrorCode ierr; 1730 PetscBool iascii; 1731 PetscViewerFormat format; 1732 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1733 1734 PetscFunctionBegin; 1735 /* check if matrix is mumps type */ 1736 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1737 1738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1739 if (iascii) { 1740 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1741 if (format == PETSC_VIEWER_ASCII_INFO) { 1742 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1743 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1744 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1745 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1746 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1748 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1749 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1750 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1751 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1752 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1753 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1754 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1755 if (mumps->id.ICNTL(11)>0) { 1756 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1758 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1759 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1760 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1762 } 1763 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1764 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1765 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1766 /* ICNTL(15-17) not used */ 1767 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1769 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1770 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1773 1774 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1780 1781 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1787 1788 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1794 1795 /* infomation local to each processor */ 1796 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1797 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1798 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1799 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1801 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1802 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1805 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1806 1807 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1808 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1809 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1810 1811 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1812 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1813 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1814 1815 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1816 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1817 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1818 1819 if (mumps->ninfo && mumps->ninfo <= 80){ 1820 PetscInt i; 1821 for (i=0; i<mumps->ninfo; i++){ 1822 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1823 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1824 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1825 } 1826 } 1827 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1828 1829 if (!mumps->myid) { /* information from the host */ 1830 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1834 1835 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1838 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1845 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1852 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1859 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1860 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1862 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1863 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1864 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1865 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1866 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1868 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1869 } 1870 } 1871 } 1872 PetscFunctionReturn(0); 1873 } 1874 1875 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1876 { 1877 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1878 1879 PetscFunctionBegin; 1880 info->block_size = 1.0; 1881 info->nz_allocated = mumps->id.INFOG(20); 1882 info->nz_used = mumps->id.INFOG(20); 1883 info->nz_unneeded = 0.0; 1884 info->assemblies = 0.0; 1885 info->mallocs = 0.0; 1886 info->memory = 0.0; 1887 info->fill_ratio_given = 0; 1888 info->fill_ratio_needed = 0; 1889 info->factor_mallocs = 0; 1890 PetscFunctionReturn(0); 1891 } 1892 1893 /* -------------------------------------------------------------------------------------------*/ 1894 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1895 { 1896 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1897 const PetscInt *idxs; 1898 PetscInt size,i; 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1903 if (mumps->petsc_size > 1) { 1904 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1905 1906 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1907 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1908 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1909 } 1910 if (mumps->id.size_schur != size) { 1911 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1912 mumps->id.size_schur = size; 1913 mumps->id.schur_lld = size; 1914 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1915 } 1916 1917 /* Schur complement matrix */ 1918 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1919 if (mumps->sym == 1) { 1920 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1921 } 1922 1923 /* MUMPS expects Fortran style indices */ 1924 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1925 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1926 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1927 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1928 if (mumps->petsc_size > 1) { 1929 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1930 } else { 1931 if (F->factortype == MAT_FACTOR_LU) { 1932 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1933 } else { 1934 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1935 } 1936 } 1937 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1938 mumps->id.ICNTL(26) = -1; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 /* -------------------------------------------------------------------------------------------*/ 1943 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1944 { 1945 Mat St; 1946 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1947 PetscScalar *array; 1948 #if defined(PETSC_USE_COMPLEX) 1949 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1950 #endif 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1955 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1956 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1957 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1958 ierr = MatSetUp(St);CHKERRQ(ierr); 1959 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1960 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1961 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1962 PetscInt i,j,N=mumps->id.size_schur; 1963 for (i=0;i<N;i++) { 1964 for (j=0;j<N;j++) { 1965 #if !defined(PETSC_USE_COMPLEX) 1966 PetscScalar val = mumps->id.schur[i*N+j]; 1967 #else 1968 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1969 #endif 1970 array[j*N+i] = val; 1971 } 1972 } 1973 } else { /* stored by columns */ 1974 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1975 } 1976 } else { /* either full or lower-triangular (not packed) */ 1977 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1978 PetscInt i,j,N=mumps->id.size_schur; 1979 for (i=0;i<N;i++) { 1980 for (j=i;j<N;j++) { 1981 #if !defined(PETSC_USE_COMPLEX) 1982 PetscScalar val = mumps->id.schur[i*N+j]; 1983 #else 1984 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1985 #endif 1986 array[i*N+j] = val; 1987 array[j*N+i] = val; 1988 } 1989 } 1990 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1991 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1992 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1993 PetscInt i,j,N=mumps->id.size_schur; 1994 for (i=0;i<N;i++) { 1995 for (j=0;j<i+1;j++) { 1996 #if !defined(PETSC_USE_COMPLEX) 1997 PetscScalar val = mumps->id.schur[i*N+j]; 1998 #else 1999 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2000 #endif 2001 array[i*N+j] = val; 2002 array[j*N+i] = val; 2003 } 2004 } 2005 } 2006 } 2007 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2008 *S = St; 2009 PetscFunctionReturn(0); 2010 } 2011 2012 /* -------------------------------------------------------------------------------------------*/ 2013 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2014 { 2015 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2016 2017 PetscFunctionBegin; 2018 mumps->id.ICNTL(icntl) = ival; 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2023 { 2024 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2025 2026 PetscFunctionBegin; 2027 *ival = mumps->id.ICNTL(icntl); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 /*@ 2032 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2033 2034 Logically Collective on Mat 2035 2036 Input Parameters: 2037 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2038 . icntl - index of MUMPS parameter array ICNTL() 2039 - ival - value of MUMPS ICNTL(icntl) 2040 2041 Options Database: 2042 . -mat_mumps_icntl_<icntl> <ival> 2043 2044 Level: beginner 2045 2046 References: 2047 . MUMPS Users' Guide 2048 2049 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2050 @*/ 2051 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2052 { 2053 PetscErrorCode ierr; 2054 2055 PetscFunctionBegin; 2056 PetscValidType(F,1); 2057 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2058 PetscValidLogicalCollectiveInt(F,icntl,2); 2059 PetscValidLogicalCollectiveInt(F,ival,3); 2060 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /*@ 2065 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2066 2067 Logically Collective on Mat 2068 2069 Input Parameters: 2070 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2071 - icntl - index of MUMPS parameter array ICNTL() 2072 2073 Output Parameter: 2074 . ival - value of MUMPS ICNTL(icntl) 2075 2076 Level: beginner 2077 2078 References: 2079 . MUMPS Users' Guide 2080 2081 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2082 @*/ 2083 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2084 { 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 PetscValidType(F,1); 2089 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2090 PetscValidLogicalCollectiveInt(F,icntl,2); 2091 PetscValidIntPointer(ival,3); 2092 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /* -------------------------------------------------------------------------------------------*/ 2097 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2098 { 2099 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2100 2101 PetscFunctionBegin; 2102 mumps->id.CNTL(icntl) = val; 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2107 { 2108 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2109 2110 PetscFunctionBegin; 2111 *val = mumps->id.CNTL(icntl); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 /*@ 2116 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2117 2118 Logically Collective on Mat 2119 2120 Input Parameters: 2121 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2122 . icntl - index of MUMPS parameter array CNTL() 2123 - val - value of MUMPS CNTL(icntl) 2124 2125 Options Database: 2126 . -mat_mumps_cntl_<icntl> <val> 2127 2128 Level: beginner 2129 2130 References: 2131 . MUMPS Users' Guide 2132 2133 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2134 @*/ 2135 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2136 { 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 PetscValidType(F,1); 2141 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2142 PetscValidLogicalCollectiveInt(F,icntl,2); 2143 PetscValidLogicalCollectiveReal(F,val,3); 2144 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /*@ 2149 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2150 2151 Logically Collective on Mat 2152 2153 Input Parameters: 2154 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2155 - icntl - index of MUMPS parameter array CNTL() 2156 2157 Output Parameter: 2158 . val - value of MUMPS CNTL(icntl) 2159 2160 Level: beginner 2161 2162 References: 2163 . MUMPS Users' Guide 2164 2165 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2166 @*/ 2167 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2168 { 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 PetscValidType(F,1); 2173 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2174 PetscValidLogicalCollectiveInt(F,icntl,2); 2175 PetscValidRealPointer(val,3); 2176 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2181 { 2182 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2183 2184 PetscFunctionBegin; 2185 *info = mumps->id.INFO(icntl); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2190 { 2191 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2192 2193 PetscFunctionBegin; 2194 *infog = mumps->id.INFOG(icntl); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2199 { 2200 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2201 2202 PetscFunctionBegin; 2203 *rinfo = mumps->id.RINFO(icntl); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2208 { 2209 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2210 2211 PetscFunctionBegin; 2212 *rinfog = mumps->id.RINFOG(icntl); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2217 { 2218 PetscErrorCode ierr; 2219 Mat Bt = NULL,Btseq = NULL; 2220 PetscBool flg; 2221 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2222 PetscScalar *aa; 2223 PetscInt spnr,*ia,*ja; 2224 2225 PetscFunctionBegin; 2226 PetscValidIntPointer(spRHS,2); 2227 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2228 if (flg) { 2229 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2230 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2231 2232 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2233 2234 if (mumps->petsc_size > 1) { 2235 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2236 Btseq = b->A; 2237 } else { 2238 Btseq = Bt; 2239 } 2240 2241 if (!mumps->myid) { 2242 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2243 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2244 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2245 2246 mumps->id.irhs_ptr = ia; 2247 mumps->id.irhs_sparse = ja; 2248 mumps->id.nz_rhs = ia[spnr] - 1; 2249 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2250 } else { 2251 mumps->id.irhs_ptr = NULL; 2252 mumps->id.irhs_sparse = NULL; 2253 mumps->id.nz_rhs = 0; 2254 mumps->id.rhs_sparse = NULL; 2255 } 2256 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2257 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2258 2259 /* solve phase */ 2260 /*-------------*/ 2261 mumps->id.job = JOB_SOLVE; 2262 PetscMUMPS_c(mumps); 2263 if (mumps->id.INFOG(1) < 0) 2264 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2265 2266 if (!mumps->myid) { 2267 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2268 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2269 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 /*@ 2275 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2276 2277 Logically Collective on Mat 2278 2279 Input Parameters: 2280 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2281 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2282 2283 Output Parameter: 2284 . spRHS - requested entries of inverse of A 2285 2286 Level: beginner 2287 2288 References: 2289 . MUMPS Users' Guide 2290 2291 .seealso: MatGetFactor(), MatCreateTranspose() 2292 @*/ 2293 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2294 { 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 PetscValidType(F,1); 2299 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2300 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2305 { 2306 PetscErrorCode ierr; 2307 Mat spRHS; 2308 2309 PetscFunctionBegin; 2310 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2311 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2312 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 /*@ 2317 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2318 2319 Logically Collective on Mat 2320 2321 Input Parameters: 2322 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2323 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2324 2325 Output Parameter: 2326 . spRHST - requested entries of inverse of A^T 2327 2328 Level: beginner 2329 2330 References: 2331 . MUMPS Users' Guide 2332 2333 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2334 @*/ 2335 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2336 { 2337 PetscErrorCode ierr; 2338 PetscBool flg; 2339 2340 PetscFunctionBegin; 2341 PetscValidType(F,1); 2342 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2343 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2344 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2345 2346 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2347 PetscFunctionReturn(0); 2348 } 2349 2350 /*@ 2351 MatMumpsGetInfo - Get MUMPS parameter INFO() 2352 2353 Logically Collective on Mat 2354 2355 Input Parameters: 2356 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2357 - icntl - index of MUMPS parameter array INFO() 2358 2359 Output Parameter: 2360 . ival - value of MUMPS INFO(icntl) 2361 2362 Level: beginner 2363 2364 References: 2365 . MUMPS Users' Guide 2366 2367 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2368 @*/ 2369 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2370 { 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 PetscValidType(F,1); 2375 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2376 PetscValidIntPointer(ival,3); 2377 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 /*@ 2382 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2383 2384 Logically Collective on Mat 2385 2386 Input Parameters: 2387 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2388 - icntl - index of MUMPS parameter array INFOG() 2389 2390 Output Parameter: 2391 . ival - value of MUMPS INFOG(icntl) 2392 2393 Level: beginner 2394 2395 References: 2396 . MUMPS Users' Guide 2397 2398 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2399 @*/ 2400 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2401 { 2402 PetscErrorCode ierr; 2403 2404 PetscFunctionBegin; 2405 PetscValidType(F,1); 2406 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2407 PetscValidIntPointer(ival,3); 2408 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 /*@ 2413 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2414 2415 Logically Collective on Mat 2416 2417 Input Parameters: 2418 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2419 - icntl - index of MUMPS parameter array RINFO() 2420 2421 Output Parameter: 2422 . val - value of MUMPS RINFO(icntl) 2423 2424 Level: beginner 2425 2426 References: 2427 . MUMPS Users' Guide 2428 2429 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2430 @*/ 2431 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2432 { 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 PetscValidType(F,1); 2437 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2438 PetscValidRealPointer(val,3); 2439 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 /*@ 2444 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2445 2446 Logically Collective on Mat 2447 2448 Input Parameters: 2449 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2450 - icntl - index of MUMPS parameter array RINFOG() 2451 2452 Output Parameter: 2453 . val - value of MUMPS RINFOG(icntl) 2454 2455 Level: beginner 2456 2457 References: 2458 . MUMPS Users' Guide 2459 2460 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2461 @*/ 2462 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2463 { 2464 PetscErrorCode ierr; 2465 2466 PetscFunctionBegin; 2467 PetscValidType(F,1); 2468 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2469 PetscValidRealPointer(val,3); 2470 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 /*MC 2475 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2476 distributed and sequential matrices via the external package MUMPS. 2477 2478 Works with MATAIJ and MATSBAIJ matrices 2479 2480 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2481 2482 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2483 2484 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2485 2486 Options Database Keys: 2487 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2488 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2489 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2490 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2491 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2492 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2493 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2494 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2495 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2496 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2497 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2498 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2499 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2500 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2501 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2502 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2503 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2504 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2505 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2506 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2507 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2508 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2509 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2510 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2511 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2512 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2513 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2514 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2515 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2516 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2517 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2518 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2519 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2520 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2521 2522 Level: beginner 2523 2524 Notes: 2525 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2526 2527 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2528 $ KSPGetPC(ksp,&pc); 2529 $ PCFactorGetMatrix(pc,&mat); 2530 $ MatMumpsGetInfo(mat,....); 2531 $ MatMumpsGetInfog(mat,....); etc. 2532 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2533 2534 Two modes to run MUMPS/PETSc with OpenMP 2535 2536 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2537 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2538 2539 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2540 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2541 2542 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2543 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2544 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2545 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2546 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2547 2548 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2549 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2550 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2551 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2552 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2553 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2554 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2555 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2556 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2557 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2558 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2559 examine the mapping result. 2560 2561 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2562 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2563 calls omp_set_num_threads(m) internally before calling MUMPS. 2564 2565 References: 2566 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2567 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2568 2569 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2570 2571 M*/ 2572 2573 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2574 { 2575 PetscFunctionBegin; 2576 *type = MATSOLVERMUMPS; 2577 PetscFunctionReturn(0); 2578 } 2579 2580 /* MatGetFactor for Seq and MPI AIJ matrices */ 2581 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2582 { 2583 Mat B; 2584 PetscErrorCode ierr; 2585 Mat_MUMPS *mumps; 2586 PetscBool isSeqAIJ; 2587 2588 PetscFunctionBegin; 2589 /* Create the factorization matrix */ 2590 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2591 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2592 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2593 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2594 ierr = MatSetUp(B);CHKERRQ(ierr); 2595 2596 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2597 2598 B->ops->view = MatView_MUMPS; 2599 B->ops->getinfo = MatGetInfo_MUMPS; 2600 2601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2614 2615 if (ftype == MAT_FACTOR_LU) { 2616 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2617 B->factortype = MAT_FACTOR_LU; 2618 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2619 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2620 mumps->sym = 0; 2621 } else { 2622 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2623 B->factortype = MAT_FACTOR_CHOLESKY; 2624 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2625 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2626 #if defined(PETSC_USE_COMPLEX) 2627 mumps->sym = 2; 2628 #else 2629 if (A->spd_set && A->spd) mumps->sym = 1; 2630 else mumps->sym = 2; 2631 #endif 2632 } 2633 2634 /* set solvertype */ 2635 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2636 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2637 2638 B->ops->destroy = MatDestroy_MUMPS; 2639 B->data = (void*)mumps; 2640 2641 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2642 2643 *F = B; 2644 PetscFunctionReturn(0); 2645 } 2646 2647 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2648 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2649 { 2650 Mat B; 2651 PetscErrorCode ierr; 2652 Mat_MUMPS *mumps; 2653 PetscBool isSeqSBAIJ; 2654 2655 PetscFunctionBegin; 2656 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2657 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2658 /* Create the factorization matrix */ 2659 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2660 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2661 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2662 ierr = MatSetUp(B);CHKERRQ(ierr); 2663 2664 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2665 if (isSeqSBAIJ) { 2666 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2667 } else { 2668 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2669 } 2670 2671 B->ops->getinfo = MatGetInfo_External; 2672 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2673 B->ops->view = MatView_MUMPS; 2674 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2688 2689 B->factortype = MAT_FACTOR_CHOLESKY; 2690 #if defined(PETSC_USE_COMPLEX) 2691 mumps->sym = 2; 2692 #else 2693 if (A->spd_set && A->spd) mumps->sym = 1; 2694 else mumps->sym = 2; 2695 #endif 2696 2697 /* set solvertype */ 2698 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2699 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2700 2701 B->ops->destroy = MatDestroy_MUMPS; 2702 B->data = (void*)mumps; 2703 2704 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2705 2706 *F = B; 2707 PetscFunctionReturn(0); 2708 } 2709 2710 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2711 { 2712 Mat B; 2713 PetscErrorCode ierr; 2714 Mat_MUMPS *mumps; 2715 PetscBool isSeqBAIJ; 2716 2717 PetscFunctionBegin; 2718 /* Create the factorization matrix */ 2719 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2720 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2721 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2722 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2723 ierr = MatSetUp(B);CHKERRQ(ierr); 2724 2725 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2726 if (ftype == MAT_FACTOR_LU) { 2727 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2728 B->factortype = MAT_FACTOR_LU; 2729 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2730 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2731 mumps->sym = 0; 2732 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2733 2734 B->ops->getinfo = MatGetInfo_External; 2735 B->ops->view = MatView_MUMPS; 2736 2737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2750 2751 /* set solvertype */ 2752 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2753 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2754 2755 B->ops->destroy = MatDestroy_MUMPS; 2756 B->data = (void*)mumps; 2757 2758 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2759 2760 *F = B; 2761 PetscFunctionReturn(0); 2762 } 2763 2764 /* MatGetFactor for Seq and MPI SELL matrices */ 2765 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2766 { 2767 Mat B; 2768 PetscErrorCode ierr; 2769 Mat_MUMPS *mumps; 2770 PetscBool isSeqSELL; 2771 2772 PetscFunctionBegin; 2773 /* Create the factorization matrix */ 2774 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2775 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2776 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2777 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2778 ierr = MatSetUp(B);CHKERRQ(ierr); 2779 2780 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2781 2782 B->ops->view = MatView_MUMPS; 2783 B->ops->getinfo = MatGetInfo_MUMPS; 2784 2785 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2786 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2787 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2788 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2789 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2790 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2791 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2792 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2793 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2794 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2795 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2796 2797 if (ftype == MAT_FACTOR_LU) { 2798 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2799 B->factortype = MAT_FACTOR_LU; 2800 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2801 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2802 mumps->sym = 0; 2803 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2804 2805 /* set solvertype */ 2806 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2807 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2808 2809 B->ops->destroy = MatDestroy_MUMPS; 2810 B->data = (void*)mumps; 2811 2812 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2813 2814 *F = B; 2815 PetscFunctionReturn(0); 2816 } 2817 2818 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2819 { 2820 PetscErrorCode ierr; 2821 2822 PetscFunctionBegin; 2823 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2824 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2825 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2826 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2827 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2828 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2829 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2830 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2831 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2832 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2833 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2834 PetscFunctionReturn(0); 2835 } 2836 2837