1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 #if defined(PETSC_USE_COMPLEX) 344 PetscBool hermitian; 345 #endif 346 347 PetscFunctionBegin; 348 #if defined(PETSC_USE_COMPLEX) 349 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 350 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 351 #endif 352 *v = aa->a; 353 if (reuse == MAT_INITIAL_MATRIX) { 354 nz = aa->nz; 355 ai = aa->i; 356 aj = aa->j; 357 *v = aa->a; 358 *nnz = nz; 359 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 360 col = row + nz; 361 362 nz = 0; 363 for (i=0; i<M; i++) { 364 rnz = ai[i+1] - ai[i]; 365 ajj = aj + ai[i]; 366 for (j=0; j<rnz; j++) { 367 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 368 } 369 } 370 *r = row; *c = col; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 376 { 377 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 378 PetscInt nz,rnz,i,j; 379 const PetscScalar *av,*v1; 380 PetscScalar *val; 381 PetscErrorCode ierr; 382 PetscInt *row,*col; 383 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 384 PetscBool missing; 385 #if defined(PETSC_USE_COMPLEX) 386 PetscBool hermitian; 387 #endif 388 389 PetscFunctionBegin; 390 #if defined(PETSC_USE_COMPLEX) 391 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 392 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 393 #endif 394 ai = aa->i; aj = aa->j; av = aa->a; 395 adiag = aa->diag; 396 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 397 if (reuse == MAT_INITIAL_MATRIX) { 398 /* count nz in the upper triangular part of A */ 399 nz = 0; 400 if (missing) { 401 for (i=0; i<M; i++) { 402 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 403 for (j=ai[i];j<ai[i+1];j++) { 404 if (aj[j] < i) continue; 405 nz++; 406 } 407 } else { 408 nz += ai[i+1] - adiag[i]; 409 } 410 } 411 } else { 412 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 413 } 414 *nnz = nz; 415 416 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 417 col = row + nz; 418 val = (PetscScalar*)(col + nz); 419 420 nz = 0; 421 if (missing) { 422 for (i=0; i<M; i++) { 423 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 424 for (j=ai[i];j<ai[i+1];j++) { 425 if (aj[j] < i) continue; 426 row[nz] = i+shift; 427 col[nz] = aj[j]+shift; 428 val[nz] = av[j]; 429 nz++; 430 } 431 } else { 432 rnz = ai[i+1] - adiag[i]; 433 ajj = aj + adiag[i]; 434 v1 = av + adiag[i]; 435 for (j=0; j<rnz; j++) { 436 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 437 } 438 } 439 } 440 } else { 441 for (i=0; i<M; i++) { 442 rnz = ai[i+1] - adiag[i]; 443 ajj = aj + adiag[i]; 444 v1 = av + adiag[i]; 445 for (j=0; j<rnz; j++) { 446 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 447 } 448 } 449 } 450 *r = row; *c = col; *v = val; 451 } else { 452 nz = 0; val = *v; 453 if (missing) { 454 for (i=0; i <M; i++) { 455 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 456 for (j=ai[i];j<ai[i+1];j++) { 457 if (aj[j] < i) continue; 458 val[nz++] = av[j]; 459 } 460 } else { 461 rnz = ai[i+1] - adiag[i]; 462 v1 = av + adiag[i]; 463 for (j=0; j<rnz; j++) { 464 val[nz++] = v1[j]; 465 } 466 } 467 } 468 } else { 469 for (i=0; i <M; i++) { 470 rnz = ai[i+1] - adiag[i]; 471 v1 = av + adiag[i]; 472 for (j=0; j<rnz; j++) { 473 val[nz++] = v1[j]; 474 } 475 } 476 } 477 } 478 PetscFunctionReturn(0); 479 } 480 481 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 482 { 483 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 484 PetscErrorCode ierr; 485 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 486 PetscInt *row,*col; 487 const PetscScalar *av, *bv,*v1,*v2; 488 PetscScalar *val; 489 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 490 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 491 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 492 #if defined(PETSC_USE_COMPLEX) 493 PetscBool hermitian; 494 #endif 495 496 PetscFunctionBegin; 497 #if defined(PETSC_USE_COMPLEX) 498 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 499 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 500 #endif 501 rstart = A->rmap->rstart; 502 ai = aa->i; 503 aj = aa->j; 504 bi = bb->i; 505 bj = bb->j; 506 av = aa->a; 507 bv = bb->a; 508 509 garray = mat->garray; 510 511 if (reuse == MAT_INITIAL_MATRIX) { 512 nz = aa->nz + bb->nz; 513 *nnz = nz; 514 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 515 col = row + nz; 516 val = (PetscScalar*)(col + nz); 517 518 *r = row; *c = col; *v = val; 519 } else { 520 row = *r; col = *c; val = *v; 521 } 522 523 jj = 0; irow = rstart; 524 for (i=0; i<m; i++) { 525 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 526 countA = ai[i+1] - ai[i]; 527 countB = bi[i+1] - bi[i]; 528 bjj = bj + bi[i]; 529 v1 = av + ai[i]; 530 v2 = bv + bi[i]; 531 532 /* A-part */ 533 for (j=0; j<countA; j++) { 534 if (reuse == MAT_INITIAL_MATRIX) { 535 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 536 } 537 val[jj++] = v1[j]; 538 } 539 540 /* B-part */ 541 for (j=0; j < countB; j++) { 542 if (reuse == MAT_INITIAL_MATRIX) { 543 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 544 } 545 val[jj++] = v2[j]; 546 } 547 irow++; 548 } 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 553 { 554 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 555 PetscErrorCode ierr; 556 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 557 PetscInt *row,*col; 558 const PetscScalar *av, *bv,*v1,*v2; 559 PetscScalar *val; 560 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 561 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 562 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 563 564 PetscFunctionBegin; 565 rstart = A->rmap->rstart; 566 ai = aa->i; 567 aj = aa->j; 568 bi = bb->i; 569 bj = bb->j; 570 av = aa->a; 571 bv = bb->a; 572 573 garray = mat->garray; 574 575 if (reuse == MAT_INITIAL_MATRIX) { 576 nz = aa->nz + bb->nz; 577 *nnz = nz; 578 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 579 col = row + nz; 580 val = (PetscScalar*)(col + nz); 581 582 *r = row; *c = col; *v = val; 583 } else { 584 row = *r; col = *c; val = *v; 585 } 586 587 jj = 0; irow = rstart; 588 for (i=0; i<m; i++) { 589 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 590 countA = ai[i+1] - ai[i]; 591 countB = bi[i+1] - bi[i]; 592 bjj = bj + bi[i]; 593 v1 = av + ai[i]; 594 v2 = bv + bi[i]; 595 596 /* A-part */ 597 for (j=0; j<countA; j++) { 598 if (reuse == MAT_INITIAL_MATRIX) { 599 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 600 } 601 val[jj++] = v1[j]; 602 } 603 604 /* B-part */ 605 for (j=0; j < countB; j++) { 606 if (reuse == MAT_INITIAL_MATRIX) { 607 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 608 } 609 val[jj++] = v2[j]; 610 } 611 irow++; 612 } 613 PetscFunctionReturn(0); 614 } 615 616 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 617 { 618 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 619 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 620 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 621 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 622 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 623 const PetscInt bs2=mat->bs2; 624 PetscErrorCode ierr; 625 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 626 PetscInt *row,*col; 627 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 628 PetscScalar *val; 629 630 PetscFunctionBegin; 631 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 632 if (reuse == MAT_INITIAL_MATRIX) { 633 nz = bs2*(aa->nz + bb->nz); 634 *nnz = nz; 635 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 636 col = row + nz; 637 val = (PetscScalar*)(col + nz); 638 639 *r = row; *c = col; *v = val; 640 } else { 641 row = *r; col = *c; val = *v; 642 } 643 644 jj = 0; irow = rstart; 645 for (i=0; i<mbs; i++) { 646 countA = ai[i+1] - ai[i]; 647 countB = bi[i+1] - bi[i]; 648 ajj = aj + ai[i]; 649 bjj = bj + bi[i]; 650 v1 = av + bs2*ai[i]; 651 v2 = bv + bs2*bi[i]; 652 653 idx = 0; 654 /* A-part */ 655 for (k=0; k<countA; k++) { 656 for (j=0; j<bs; j++) { 657 for (n=0; n<bs; n++) { 658 if (reuse == MAT_INITIAL_MATRIX) { 659 row[jj] = irow + n + shift; 660 col[jj] = rstart + bs*ajj[k] + j + shift; 661 } 662 val[jj++] = v1[idx++]; 663 } 664 } 665 } 666 667 idx = 0; 668 /* B-part */ 669 for (k=0; k<countB; k++) { 670 for (j=0; j<bs; j++) { 671 for (n=0; n<bs; n++) { 672 if (reuse == MAT_INITIAL_MATRIX) { 673 row[jj] = irow + n + shift; 674 col[jj] = bs*garray[bjj[k]] + j + shift; 675 } 676 val[jj++] = v2[idx++]; 677 } 678 } 679 } 680 irow += bs; 681 } 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 686 { 687 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 688 PetscErrorCode ierr; 689 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 690 PetscInt *row,*col; 691 const PetscScalar *av, *bv,*v1,*v2; 692 PetscScalar *val; 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 694 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 695 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 696 #if defined(PETSC_USE_COMPLEX) 697 PetscBool hermitian; 698 #endif 699 700 PetscFunctionBegin; 701 #if defined(PETSC_USE_COMPLEX) 702 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 703 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 704 #endif 705 ai = aa->i; 706 aj = aa->j; 707 adiag = aa->diag; 708 bi = bb->i; 709 bj = bb->j; 710 garray = mat->garray; 711 av = aa->a; 712 bv = bb->a; 713 714 rstart = A->rmap->rstart; 715 716 if (reuse == MAT_INITIAL_MATRIX) { 717 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 718 nzb = 0; /* num of upper triangular entries in mat->B */ 719 for (i=0; i<m; i++) { 720 nza += (ai[i+1] - adiag[i]); 721 countB = bi[i+1] - bi[i]; 722 bjj = bj + bi[i]; 723 for (j=0; j<countB; j++) { 724 if (garray[bjj[j]] > rstart) nzb++; 725 } 726 } 727 728 nz = nza + nzb; /* total nz of upper triangular part of mat */ 729 *nnz = nz; 730 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 731 col = row + nz; 732 val = (PetscScalar*)(col + nz); 733 734 *r = row; *c = col; *v = val; 735 } else { 736 row = *r; col = *c; val = *v; 737 } 738 739 jj = 0; irow = rstart; 740 for (i=0; i<m; i++) { 741 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 742 v1 = av + adiag[i]; 743 countA = ai[i+1] - adiag[i]; 744 countB = bi[i+1] - bi[i]; 745 bjj = bj + bi[i]; 746 v2 = bv + bi[i]; 747 748 /* A-part */ 749 for (j=0; j<countA; j++) { 750 if (reuse == MAT_INITIAL_MATRIX) { 751 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 752 } 753 val[jj++] = v1[j]; 754 } 755 756 /* B-part */ 757 for (j=0; j < countB; j++) { 758 if (garray[bjj[j]] > rstart) { 759 if (reuse == MAT_INITIAL_MATRIX) { 760 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 761 } 762 val[jj++] = v2[j]; 763 } 764 } 765 irow++; 766 } 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatDestroy_MUMPS(Mat A) 771 { 772 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 777 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 778 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 779 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 780 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 781 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 782 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 783 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 784 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 785 mumps->id.job = JOB_END; 786 PetscMUMPS_c(mumps); 787 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 788 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 789 #endif 790 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 791 ierr = PetscFree(A->data);CHKERRQ(ierr); 792 793 /* clear composed functions */ 794 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 797 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 798 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 799 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 800 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 801 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 802 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 803 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 804 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 805 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 806 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 811 { 812 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 813 PetscScalar *array; 814 Vec b_seq; 815 IS is_iden,is_petsc; 816 PetscErrorCode ierr; 817 PetscInt i; 818 PetscBool second_solve = PETSC_FALSE; 819 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 820 821 PetscFunctionBegin; 822 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 823 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 824 825 if (A->factorerrortype) { 826 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 827 ierr = VecSetInf(x);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 mumps->id.ICNTL(20)= 0; /* dense RHS */ 832 mumps->id.nrhs = 1; 833 b_seq = mumps->b_seq; 834 if (mumps->petsc_size > 1) { 835 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 836 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 837 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 839 } else { /* petsc_size == 1 */ 840 ierr = VecCopy(b,x);CHKERRQ(ierr); 841 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 842 } 843 if (!mumps->myid) { /* define rhs on the host */ 844 mumps->id.nrhs = 1; 845 mumps->id.rhs = (MumpsScalar*)array; 846 } 847 848 /* 849 handle condensation step of Schur complement (if any) 850 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 851 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 852 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 853 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 854 */ 855 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 856 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 857 second_solve = PETSC_TRUE; 858 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 859 } 860 /* solve phase */ 861 /*-------------*/ 862 mumps->id.job = JOB_SOLVE; 863 PetscMUMPS_c(mumps); 864 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 865 866 /* handle expansion step of Schur complement (if any) */ 867 if (second_solve) { 868 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 869 } 870 871 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 872 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 873 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 874 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 875 } 876 if (!mumps->scat_sol) { /* create scatter scat_sol */ 877 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 878 for (i=0; i<mumps->id.lsol_loc; i++) { 879 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 880 } 881 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 882 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 884 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 885 886 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 887 } 888 889 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 } 892 893 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 894 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 895 896 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 901 { 902 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 mumps->id.ICNTL(9) = 0; 907 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 908 mumps->id.ICNTL(9) = 1; 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 913 { 914 PetscErrorCode ierr; 915 Mat Bt = NULL; 916 PetscBool flg, flgT; 917 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 918 PetscInt i,nrhs,M; 919 PetscScalar *array; 920 const PetscScalar *rbray; 921 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 922 PetscScalar *bray,*sol_loc,*sol_loc_save; 923 IS is_to,is_from; 924 PetscInt k,proc,j,m,myrstart; 925 const PetscInt *rstart; 926 Vec v_mpi,b_seq,msol_loc; 927 VecScatter scat_rhs,scat_sol; 928 PetscScalar *aa; 929 PetscInt spnr,*ia,*ja; 930 Mat_MPIAIJ *b = NULL; 931 932 PetscFunctionBegin; 933 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 934 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 935 936 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 937 if (flg) { /* dense B */ 938 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 939 mumps->id.ICNTL(20)= 0; /* dense RHS */ 940 } else { /* sparse B */ 941 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 942 if (flgT) { /* input B is transpose of actural RHS matrix, 943 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 944 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 945 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 946 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 947 } 948 949 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 950 mumps->id.nrhs = nrhs; 951 mumps->id.lrhs = M; 952 mumps->id.rhs = NULL; 953 954 if (mumps->petsc_size == 1) { 955 PetscScalar *aa; 956 PetscInt spnr,*ia,*ja; 957 PetscBool second_solve = PETSC_FALSE; 958 959 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 960 mumps->id.rhs = (MumpsScalar*)array; 961 962 if (!Bt) { /* dense B */ 963 /* copy B to X */ 964 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 965 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 966 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 967 } else { /* sparse B */ 968 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 969 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 970 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 971 /* mumps requires ia and ja start at 1! */ 972 mumps->id.irhs_ptr = ia; 973 mumps->id.irhs_sparse = ja; 974 mumps->id.nz_rhs = ia[spnr] - 1; 975 mumps->id.rhs_sparse = (MumpsScalar*)aa; 976 } 977 /* handle condensation step of Schur complement (if any) */ 978 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 979 second_solve = PETSC_TRUE; 980 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 981 } 982 /* solve phase */ 983 /*-------------*/ 984 mumps->id.job = JOB_SOLVE; 985 PetscMUMPS_c(mumps); 986 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 987 988 /* handle expansion step of Schur complement (if any) */ 989 if (second_solve) { 990 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 if (Bt) { /* sparse B */ 993 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 994 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 995 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 996 } 997 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1002 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1003 1004 /* create msol_loc to hold mumps local solution */ 1005 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1006 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1007 1008 lsol_loc = mumps->id.lsol_loc; 1009 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1010 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1011 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1012 mumps->id.isol_loc = isol_loc; 1013 1014 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1015 1016 if (!Bt) { /* dense B */ 1017 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1018 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1019 0, re-arrange B into desired order, which is a local operation. 1020 */ 1021 1022 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1023 /* wrap dense rhs matrix B into a vector v_mpi */ 1024 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1025 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1026 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1027 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1028 1029 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1030 if (!mumps->myid) { 1031 PetscInt *idx; 1032 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1033 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1034 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1035 k = 0; 1036 for (proc=0; proc<mumps->petsc_size; proc++){ 1037 for (j=0; j<nrhs; j++){ 1038 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1039 } 1040 } 1041 1042 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1043 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1044 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1045 } else { 1046 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1047 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1048 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1049 } 1050 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1051 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1053 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 1056 if (!mumps->myid) { /* define rhs on the host */ 1057 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1058 mumps->id.rhs = (MumpsScalar*)bray; 1059 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1060 } 1061 1062 } else { /* sparse B */ 1063 b = (Mat_MPIAIJ*)Bt->data; 1064 1065 /* wrap dense X into a vector v_mpi */ 1066 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1067 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1068 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1069 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1070 1071 if (!mumps->myid) { 1072 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1073 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1074 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1075 /* mumps requires ia and ja start at 1! */ 1076 mumps->id.irhs_ptr = ia; 1077 mumps->id.irhs_sparse = ja; 1078 mumps->id.nz_rhs = ia[spnr] - 1; 1079 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1080 } else { 1081 mumps->id.irhs_ptr = NULL; 1082 mumps->id.irhs_sparse = NULL; 1083 mumps->id.nz_rhs = 0; 1084 mumps->id.rhs_sparse = NULL; 1085 } 1086 } 1087 1088 /* solve phase */ 1089 /*-------------*/ 1090 mumps->id.job = JOB_SOLVE; 1091 PetscMUMPS_c(mumps); 1092 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1093 1094 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1095 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1096 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1097 1098 /* create scatter scat_sol */ 1099 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1100 /* iidx: index for scatter mumps solution to petsc X */ 1101 1102 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1103 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1104 for (i=0; i<lsol_loc; i++) { 1105 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1106 1107 for (proc=0; proc<mumps->petsc_size; proc++){ 1108 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1109 myrstart = rstart[proc]; 1110 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1111 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1112 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1113 break; 1114 } 1115 } 1116 1117 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1118 } 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1120 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1121 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1122 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1123 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1124 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1126 1127 /* free spaces */ 1128 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1129 mumps->id.isol_loc = isol_loc_save; 1130 1131 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1132 ierr = PetscFree(idxx);CHKERRQ(ierr); 1133 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1134 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1135 if (Bt) { 1136 if (!mumps->myid) { 1137 b = (Mat_MPIAIJ*)Bt->data; 1138 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1139 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1140 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1141 } 1142 } else { 1143 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1144 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1145 } 1146 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1147 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1152 { 1153 PetscErrorCode ierr; 1154 PetscBool flg; 1155 Mat B; 1156 1157 PetscFunctionBegin; 1158 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1159 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1160 1161 /* Create B=Bt^T that uses Bt's data structure */ 1162 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1163 1164 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1165 ierr = MatDestroy(&B);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #if !defined(PETSC_USE_COMPLEX) 1170 /* 1171 input: 1172 F: numeric factor 1173 output: 1174 nneg: total number of negative pivots 1175 nzero: total number of zero pivots 1176 npos: (global dimension of F) - nneg - nzero 1177 */ 1178 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1179 { 1180 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1181 PetscErrorCode ierr; 1182 PetscMPIInt size; 1183 1184 PetscFunctionBegin; 1185 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1186 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1187 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1188 1189 if (nneg) *nneg = mumps->id.INFOG(12); 1190 if (nzero || npos) { 1191 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1192 if (nzero) *nzero = mumps->id.INFOG(28); 1193 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 #endif 1198 1199 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1200 { 1201 PetscErrorCode ierr; 1202 PetscInt i,nz=0,*irn,*jcn=0; 1203 PetscScalar *val=0; 1204 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1205 1206 PetscFunctionBegin; 1207 if (mumps->omp_comm_size > 1) { 1208 if (reuse == MAT_INITIAL_MATRIX) { 1209 /* master first gathers counts of nonzeros to receive */ 1210 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1211 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1212 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1213 1214 /* master allocates memory to receive nonzeros */ 1215 if (mumps->is_omp_master) { 1216 displs[0] = 0; 1217 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1218 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1219 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1220 jcn = irn + nz; 1221 val = (PetscScalar*)(jcn + nz); 1222 } 1223 1224 /* save the gatherv plan */ 1225 mumps->mpinz = mpinz; /* used as send count */ 1226 mumps->recvcount = recvcount; 1227 mumps->displs = displs; 1228 1229 /* master gathers nonzeros */ 1230 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1231 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1232 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1233 1234 /* master frees its row/col/val and replaces them with bigger arrays */ 1235 if (mumps->is_omp_master) { 1236 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1237 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1238 mumps->irn = irn; 1239 mumps->jcn = jcn; 1240 mumps->val = val; 1241 } 1242 } else { 1243 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1244 } 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1250 { 1251 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1252 PetscErrorCode ierr; 1253 PetscBool isMPIAIJ; 1254 1255 PetscFunctionBegin; 1256 if (mumps->id.INFOG(1) < 0) { 1257 if (mumps->id.INFOG(1) == -6) { 1258 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1259 } 1260 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1265 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1266 1267 /* numerical factorization phase */ 1268 /*-------------------------------*/ 1269 mumps->id.job = JOB_FACTNUMERIC; 1270 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1271 if (!mumps->myid) { 1272 mumps->id.a = (MumpsScalar*)mumps->val; 1273 } 1274 } else { 1275 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1276 } 1277 PetscMUMPS_c(mumps); 1278 if (mumps->id.INFOG(1) < 0) { 1279 if (A->erroriffailure) { 1280 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1281 } else { 1282 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1283 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1284 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1285 } else if (mumps->id.INFOG(1) == -13) { 1286 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1287 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1288 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1289 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1290 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1291 } else { 1292 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1293 F->factorerrortype = MAT_FACTOR_OTHER; 1294 } 1295 } 1296 } 1297 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1298 1299 F->assembled = PETSC_TRUE; 1300 mumps->matstruc = SAME_NONZERO_PATTERN; 1301 if (F->schur) { /* reset Schur status to unfactored */ 1302 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1303 mumps->id.ICNTL(19) = 2; 1304 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1305 } 1306 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1307 } 1308 1309 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1310 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1311 1312 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1313 if (mumps->petsc_size > 1) { 1314 PetscInt lsol_loc; 1315 PetscScalar *sol_loc; 1316 1317 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1318 1319 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1320 if (mumps->x_seq) { 1321 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1322 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1323 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1324 } 1325 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1326 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1327 mumps->id.lsol_loc = lsol_loc; 1328 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1329 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1330 } 1331 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* Sets MUMPS options from the options database */ 1336 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1337 { 1338 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1339 PetscErrorCode ierr; 1340 PetscInt icntl,info[80],i,ninfo=80; 1341 PetscBool flg; 1342 1343 PetscFunctionBegin; 1344 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1345 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1346 if (flg) mumps->id.ICNTL(1) = icntl; 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1348 if (flg) mumps->id.ICNTL(2) = icntl; 1349 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1350 if (flg) mumps->id.ICNTL(3) = icntl; 1351 1352 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1353 if (flg) mumps->id.ICNTL(4) = icntl; 1354 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1355 1356 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1357 if (flg) mumps->id.ICNTL(6) = icntl; 1358 1359 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1360 if (flg) { 1361 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1362 else mumps->id.ICNTL(7) = icntl; 1363 } 1364 1365 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1366 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1367 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1368 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1369 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1370 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1371 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1372 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1373 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1374 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1375 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1376 } 1377 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1378 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1379 1380 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1381 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1382 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1383 if (mumps->id.ICNTL(24)) { 1384 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1385 } 1386 1387 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1388 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1389 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1390 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1391 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1392 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1393 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1394 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1395 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1396 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1397 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1398 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1399 1400 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1401 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1402 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1403 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1404 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1405 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1406 1407 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1408 1409 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1410 if (ninfo) { 1411 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1412 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1413 mumps->ninfo = ninfo; 1414 for (i=0; i<ninfo; i++) { 1415 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1416 else mumps->info[i] = info[i]; 1417 } 1418 } 1419 1420 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1425 { 1426 PetscErrorCode ierr; 1427 PetscInt nthreads=0; 1428 1429 PetscFunctionBegin; 1430 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1431 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1432 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1433 1434 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1435 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1436 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1437 if (mumps->use_petsc_omp_support) { 1438 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1439 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1440 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1441 #else 1442 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1443 #endif 1444 } else { 1445 mumps->omp_comm = PETSC_COMM_SELF; 1446 mumps->mumps_comm = mumps->petsc_comm; 1447 mumps->is_omp_master = PETSC_TRUE; 1448 } 1449 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1450 1451 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1452 mumps->id.job = JOB_INIT; 1453 mumps->id.par = 1; /* host participates factorizaton and solve */ 1454 mumps->id.sym = mumps->sym; 1455 1456 PetscMUMPS_c(mumps); 1457 1458 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1459 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1460 */ 1461 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1462 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1463 1464 mumps->scat_rhs = NULL; 1465 mumps->scat_sol = NULL; 1466 1467 /* set PETSc-MUMPS default options - override MUMPS default */ 1468 mumps->id.ICNTL(3) = 0; 1469 mumps->id.ICNTL(4) = 0; 1470 if (mumps->petsc_size == 1) { 1471 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1472 } else { 1473 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1474 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1475 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1476 } 1477 1478 /* schur */ 1479 mumps->id.size_schur = 0; 1480 mumps->id.listvar_schur = NULL; 1481 mumps->id.schur = NULL; 1482 mumps->sizeredrhs = 0; 1483 mumps->schur_sol = NULL; 1484 mumps->schur_sizesol = 0; 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1489 { 1490 PetscErrorCode ierr; 1491 1492 PetscFunctionBegin; 1493 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1494 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1495 if (mumps->id.INFOG(1) < 0) { 1496 if (A->erroriffailure) { 1497 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1498 } else { 1499 if (mumps->id.INFOG(1) == -6) { 1500 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1501 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1502 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1503 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1504 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1505 } else { 1506 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1507 F->factorerrortype = MAT_FACTOR_OTHER; 1508 } 1509 } 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1515 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1516 { 1517 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1518 PetscErrorCode ierr; 1519 Vec b; 1520 const PetscInt M = A->rmap->N; 1521 1522 PetscFunctionBegin; 1523 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1524 1525 /* Set MUMPS options from the options database */ 1526 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1527 1528 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1529 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1530 1531 /* analysis phase */ 1532 /*----------------*/ 1533 mumps->id.job = JOB_FACTSYMBOLIC; 1534 mumps->id.n = M; 1535 switch (mumps->id.ICNTL(18)) { 1536 case 0: /* centralized assembled matrix input */ 1537 if (!mumps->myid) { 1538 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1539 if (mumps->id.ICNTL(6)>1) { 1540 mumps->id.a = (MumpsScalar*)mumps->val; 1541 } 1542 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1543 /* 1544 PetscBool flag; 1545 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1546 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1547 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1548 */ 1549 if (!mumps->myid) { 1550 const PetscInt *idx; 1551 PetscInt i,*perm_in; 1552 1553 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1554 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1555 1556 mumps->id.perm_in = perm_in; 1557 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1558 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1559 } 1560 } 1561 } 1562 break; 1563 case 3: /* distributed assembled matrix input (size>1) */ 1564 mumps->id.nz_loc = mumps->nz; 1565 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1566 if (mumps->id.ICNTL(6)>1) { 1567 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1568 } 1569 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1570 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1571 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1572 ierr = VecDestroy(&b);CHKERRQ(ierr); 1573 break; 1574 } 1575 PetscMUMPS_c(mumps); 1576 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1577 1578 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1579 F->ops->solve = MatSolve_MUMPS; 1580 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1581 F->ops->matsolve = MatMatSolve_MUMPS; 1582 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 /* Note the Petsc r and c permutations are ignored */ 1587 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1588 { 1589 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1590 PetscErrorCode ierr; 1591 Vec b; 1592 const PetscInt M = A->rmap->N; 1593 1594 PetscFunctionBegin; 1595 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1596 1597 /* Set MUMPS options from the options database */ 1598 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1599 1600 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1601 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1602 1603 /* analysis phase */ 1604 /*----------------*/ 1605 mumps->id.job = JOB_FACTSYMBOLIC; 1606 mumps->id.n = M; 1607 switch (mumps->id.ICNTL(18)) { 1608 case 0: /* centralized assembled matrix input */ 1609 if (!mumps->myid) { 1610 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1611 if (mumps->id.ICNTL(6)>1) { 1612 mumps->id.a = (MumpsScalar*)mumps->val; 1613 } 1614 } 1615 break; 1616 case 3: /* distributed assembled matrix input (size>1) */ 1617 mumps->id.nz_loc = mumps->nz; 1618 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1619 if (mumps->id.ICNTL(6)>1) { 1620 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1621 } 1622 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1623 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1624 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1625 ierr = VecDestroy(&b);CHKERRQ(ierr); 1626 break; 1627 } 1628 PetscMUMPS_c(mumps); 1629 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1630 1631 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1632 F->ops->solve = MatSolve_MUMPS; 1633 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 /* Note the Petsc r permutation and factor info are ignored */ 1638 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1639 { 1640 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1641 PetscErrorCode ierr; 1642 Vec b; 1643 const PetscInt M = A->rmap->N; 1644 1645 PetscFunctionBegin; 1646 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1647 1648 /* Set MUMPS options from the options database */ 1649 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1650 1651 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1652 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1653 1654 /* analysis phase */ 1655 /*----------------*/ 1656 mumps->id.job = JOB_FACTSYMBOLIC; 1657 mumps->id.n = M; 1658 switch (mumps->id.ICNTL(18)) { 1659 case 0: /* centralized assembled matrix input */ 1660 if (!mumps->myid) { 1661 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1662 if (mumps->id.ICNTL(6)>1) { 1663 mumps->id.a = (MumpsScalar*)mumps->val; 1664 } 1665 } 1666 break; 1667 case 3: /* distributed assembled matrix input (size>1) */ 1668 mumps->id.nz_loc = mumps->nz; 1669 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1670 if (mumps->id.ICNTL(6)>1) { 1671 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1672 } 1673 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1674 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1675 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1676 ierr = VecDestroy(&b);CHKERRQ(ierr); 1677 break; 1678 } 1679 PetscMUMPS_c(mumps); 1680 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1681 1682 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1683 F->ops->solve = MatSolve_MUMPS; 1684 F->ops->solvetranspose = MatSolve_MUMPS; 1685 F->ops->matsolve = MatMatSolve_MUMPS; 1686 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1687 #if defined(PETSC_USE_COMPLEX) 1688 F->ops->getinertia = NULL; 1689 #else 1690 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1691 #endif 1692 PetscFunctionReturn(0); 1693 } 1694 1695 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1696 { 1697 PetscErrorCode ierr; 1698 PetscBool iascii; 1699 PetscViewerFormat format; 1700 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1701 1702 PetscFunctionBegin; 1703 /* check if matrix is mumps type */ 1704 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1705 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1707 if (iascii) { 1708 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1709 if (format == PETSC_VIEWER_ASCII_INFO) { 1710 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1711 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1712 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1713 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1714 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1715 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1716 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1717 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1718 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1719 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1720 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1721 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1722 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1723 if (mumps->id.ICNTL(11)>0) { 1724 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1725 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1726 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1727 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1728 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1729 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1730 } 1731 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1732 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1733 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1734 /* ICNTL(15-17) not used */ 1735 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1736 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1737 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1738 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1739 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1740 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1741 1742 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1743 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1744 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1745 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1746 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1748 1749 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1750 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1751 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1752 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1753 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1754 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1755 1756 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1758 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1759 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1760 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1762 1763 /* infomation local to each processor */ 1764 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1765 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1766 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1767 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1769 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1770 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1772 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1773 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1774 1775 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1776 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1777 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1778 1779 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1780 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1781 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1782 1783 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1784 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1785 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1786 1787 if (mumps->ninfo && mumps->ninfo <= 80){ 1788 PetscInt i; 1789 for (i=0; i<mumps->ninfo; i++){ 1790 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1791 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1792 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1793 } 1794 } 1795 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1796 1797 if (!mumps->myid) { /* information from the host */ 1798 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1802 1803 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1820 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1837 } 1838 } 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1844 { 1845 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1846 1847 PetscFunctionBegin; 1848 info->block_size = 1.0; 1849 info->nz_allocated = mumps->id.INFOG(20); 1850 info->nz_used = mumps->id.INFOG(20); 1851 info->nz_unneeded = 0.0; 1852 info->assemblies = 0.0; 1853 info->mallocs = 0.0; 1854 info->memory = 0.0; 1855 info->fill_ratio_given = 0; 1856 info->fill_ratio_needed = 0; 1857 info->factor_mallocs = 0; 1858 PetscFunctionReturn(0); 1859 } 1860 1861 /* -------------------------------------------------------------------------------------------*/ 1862 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1863 { 1864 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1865 const PetscInt *idxs; 1866 PetscInt size,i; 1867 PetscErrorCode ierr; 1868 1869 PetscFunctionBegin; 1870 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1871 if (mumps->petsc_size > 1) { 1872 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1873 1874 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1875 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1876 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1877 } 1878 if (mumps->id.size_schur != size) { 1879 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1880 mumps->id.size_schur = size; 1881 mumps->id.schur_lld = size; 1882 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1883 } 1884 1885 /* Schur complement matrix */ 1886 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1887 if (mumps->sym == 1) { 1888 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1889 } 1890 1891 /* MUMPS expects Fortran style indices */ 1892 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1893 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1894 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1895 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1896 if (mumps->petsc_size > 1) { 1897 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1898 } else { 1899 if (F->factortype == MAT_FACTOR_LU) { 1900 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1901 } else { 1902 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1903 } 1904 } 1905 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1906 mumps->id.ICNTL(26) = -1; 1907 PetscFunctionReturn(0); 1908 } 1909 1910 /* -------------------------------------------------------------------------------------------*/ 1911 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1912 { 1913 Mat St; 1914 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1915 PetscScalar *array; 1916 #if defined(PETSC_USE_COMPLEX) 1917 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1918 #endif 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1923 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1924 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1925 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1926 ierr = MatSetUp(St);CHKERRQ(ierr); 1927 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1928 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1929 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1930 PetscInt i,j,N=mumps->id.size_schur; 1931 for (i=0;i<N;i++) { 1932 for (j=0;j<N;j++) { 1933 #if !defined(PETSC_USE_COMPLEX) 1934 PetscScalar val = mumps->id.schur[i*N+j]; 1935 #else 1936 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1937 #endif 1938 array[j*N+i] = val; 1939 } 1940 } 1941 } else { /* stored by columns */ 1942 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1943 } 1944 } else { /* either full or lower-triangular (not packed) */ 1945 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1946 PetscInt i,j,N=mumps->id.size_schur; 1947 for (i=0;i<N;i++) { 1948 for (j=i;j<N;j++) { 1949 #if !defined(PETSC_USE_COMPLEX) 1950 PetscScalar val = mumps->id.schur[i*N+j]; 1951 #else 1952 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1953 #endif 1954 array[i*N+j] = val; 1955 array[j*N+i] = val; 1956 } 1957 } 1958 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1959 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1960 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1961 PetscInt i,j,N=mumps->id.size_schur; 1962 for (i=0;i<N;i++) { 1963 for (j=0;j<i+1;j++) { 1964 #if !defined(PETSC_USE_COMPLEX) 1965 PetscScalar val = mumps->id.schur[i*N+j]; 1966 #else 1967 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1968 #endif 1969 array[i*N+j] = val; 1970 array[j*N+i] = val; 1971 } 1972 } 1973 } 1974 } 1975 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 1976 *S = St; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* -------------------------------------------------------------------------------------------*/ 1981 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1982 { 1983 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1984 1985 PetscFunctionBegin; 1986 mumps->id.ICNTL(icntl) = ival; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 1991 { 1992 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1993 1994 PetscFunctionBegin; 1995 *ival = mumps->id.ICNTL(icntl); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /*@ 2000 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2001 2002 Logically Collective on Mat 2003 2004 Input Parameters: 2005 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2006 . icntl - index of MUMPS parameter array ICNTL() 2007 - ival - value of MUMPS ICNTL(icntl) 2008 2009 Options Database: 2010 . -mat_mumps_icntl_<icntl> <ival> 2011 2012 Level: beginner 2013 2014 References: 2015 . MUMPS Users' Guide 2016 2017 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2018 @*/ 2019 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2020 { 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 PetscValidType(F,1); 2025 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2026 PetscValidLogicalCollectiveInt(F,icntl,2); 2027 PetscValidLogicalCollectiveInt(F,ival,3); 2028 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 /*@ 2033 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2034 2035 Logically Collective on Mat 2036 2037 Input Parameters: 2038 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2039 - icntl - index of MUMPS parameter array ICNTL() 2040 2041 Output Parameter: 2042 . ival - value of MUMPS ICNTL(icntl) 2043 2044 Level: beginner 2045 2046 References: 2047 . MUMPS Users' Guide 2048 2049 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2050 @*/ 2051 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2052 { 2053 PetscErrorCode ierr; 2054 2055 PetscFunctionBegin; 2056 PetscValidType(F,1); 2057 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2058 PetscValidLogicalCollectiveInt(F,icntl,2); 2059 PetscValidIntPointer(ival,3); 2060 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /* -------------------------------------------------------------------------------------------*/ 2065 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2066 { 2067 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2068 2069 PetscFunctionBegin; 2070 mumps->id.CNTL(icntl) = val; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2075 { 2076 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2077 2078 PetscFunctionBegin; 2079 *val = mumps->id.CNTL(icntl); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /*@ 2084 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2085 2086 Logically Collective on Mat 2087 2088 Input Parameters: 2089 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2090 . icntl - index of MUMPS parameter array CNTL() 2091 - val - value of MUMPS CNTL(icntl) 2092 2093 Options Database: 2094 . -mat_mumps_cntl_<icntl> <val> 2095 2096 Level: beginner 2097 2098 References: 2099 . MUMPS Users' Guide 2100 2101 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2102 @*/ 2103 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2104 { 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 PetscValidType(F,1); 2109 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2110 PetscValidLogicalCollectiveInt(F,icntl,2); 2111 PetscValidLogicalCollectiveReal(F,val,3); 2112 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 /*@ 2117 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2118 2119 Logically Collective on Mat 2120 2121 Input Parameters: 2122 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2123 - icntl - index of MUMPS parameter array CNTL() 2124 2125 Output Parameter: 2126 . val - value of MUMPS CNTL(icntl) 2127 2128 Level: beginner 2129 2130 References: 2131 . MUMPS Users' Guide 2132 2133 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2134 @*/ 2135 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2136 { 2137 PetscErrorCode ierr; 2138 2139 PetscFunctionBegin; 2140 PetscValidType(F,1); 2141 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2142 PetscValidLogicalCollectiveInt(F,icntl,2); 2143 PetscValidRealPointer(val,3); 2144 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2149 { 2150 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2151 2152 PetscFunctionBegin; 2153 *info = mumps->id.INFO(icntl); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2158 { 2159 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2160 2161 PetscFunctionBegin; 2162 *infog = mumps->id.INFOG(icntl); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2167 { 2168 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2169 2170 PetscFunctionBegin; 2171 *rinfo = mumps->id.RINFO(icntl); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2176 { 2177 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2178 2179 PetscFunctionBegin; 2180 *rinfog = mumps->id.RINFOG(icntl); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2185 { 2186 PetscErrorCode ierr; 2187 Mat Bt = NULL,Btseq = NULL; 2188 PetscBool flg; 2189 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2190 PetscScalar *aa; 2191 PetscInt spnr,*ia,*ja; 2192 2193 PetscFunctionBegin; 2194 PetscValidIntPointer(spRHS,2); 2195 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2196 if (flg) { 2197 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2198 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2199 2200 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2201 2202 if (mumps->petsc_size > 1) { 2203 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2204 Btseq = b->A; 2205 } else { 2206 Btseq = Bt; 2207 } 2208 2209 if (!mumps->myid) { 2210 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2211 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2212 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2213 2214 mumps->id.irhs_ptr = ia; 2215 mumps->id.irhs_sparse = ja; 2216 mumps->id.nz_rhs = ia[spnr] - 1; 2217 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2218 } else { 2219 mumps->id.irhs_ptr = NULL; 2220 mumps->id.irhs_sparse = NULL; 2221 mumps->id.nz_rhs = 0; 2222 mumps->id.rhs_sparse = NULL; 2223 } 2224 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2225 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2226 2227 /* solve phase */ 2228 /*-------------*/ 2229 mumps->id.job = JOB_SOLVE; 2230 PetscMUMPS_c(mumps); 2231 if (mumps->id.INFOG(1) < 0) 2232 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2233 2234 if (!mumps->myid) { 2235 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2236 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2237 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2238 } 2239 PetscFunctionReturn(0); 2240 } 2241 2242 /*@ 2243 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2244 2245 Logically Collective on Mat 2246 2247 Input Parameters: 2248 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2249 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2250 2251 Output Parameter: 2252 . spRHS - requested entries of inverse of A 2253 2254 Level: beginner 2255 2256 References: 2257 . MUMPS Users' Guide 2258 2259 .seealso: MatGetFactor(), MatCreateTranspose() 2260 @*/ 2261 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2262 { 2263 PetscErrorCode ierr; 2264 2265 PetscFunctionBegin; 2266 PetscValidType(F,1); 2267 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2268 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2273 { 2274 PetscErrorCode ierr; 2275 Mat spRHS; 2276 2277 PetscFunctionBegin; 2278 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2279 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2280 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 /*@ 2285 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2286 2287 Logically Collective on Mat 2288 2289 Input Parameters: 2290 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2291 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2292 2293 Output Parameter: 2294 . spRHST - requested entries of inverse of A^T 2295 2296 Level: beginner 2297 2298 References: 2299 . MUMPS Users' Guide 2300 2301 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2302 @*/ 2303 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2304 { 2305 PetscErrorCode ierr; 2306 PetscBool flg; 2307 2308 PetscFunctionBegin; 2309 PetscValidType(F,1); 2310 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2311 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2312 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2313 2314 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 /*@ 2319 MatMumpsGetInfo - Get MUMPS parameter INFO() 2320 2321 Logically Collective on Mat 2322 2323 Input Parameters: 2324 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2325 - icntl - index of MUMPS parameter array INFO() 2326 2327 Output Parameter: 2328 . ival - value of MUMPS INFO(icntl) 2329 2330 Level: beginner 2331 2332 References: 2333 . MUMPS Users' Guide 2334 2335 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2336 @*/ 2337 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2338 { 2339 PetscErrorCode ierr; 2340 2341 PetscFunctionBegin; 2342 PetscValidType(F,1); 2343 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2344 PetscValidIntPointer(ival,3); 2345 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 /*@ 2350 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2351 2352 Logically Collective on Mat 2353 2354 Input Parameters: 2355 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2356 - icntl - index of MUMPS parameter array INFOG() 2357 2358 Output Parameter: 2359 . ival - value of MUMPS INFOG(icntl) 2360 2361 Level: beginner 2362 2363 References: 2364 . MUMPS Users' Guide 2365 2366 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2367 @*/ 2368 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2369 { 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 PetscValidType(F,1); 2374 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2375 PetscValidIntPointer(ival,3); 2376 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 /*@ 2381 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2382 2383 Logically Collective on Mat 2384 2385 Input Parameters: 2386 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2387 - icntl - index of MUMPS parameter array RINFO() 2388 2389 Output Parameter: 2390 . val - value of MUMPS RINFO(icntl) 2391 2392 Level: beginner 2393 2394 References: 2395 . MUMPS Users' Guide 2396 2397 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2398 @*/ 2399 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2400 { 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 PetscValidType(F,1); 2405 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2406 PetscValidRealPointer(val,3); 2407 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /*@ 2412 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2413 2414 Logically Collective on Mat 2415 2416 Input Parameters: 2417 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2418 - icntl - index of MUMPS parameter array RINFOG() 2419 2420 Output Parameter: 2421 . val - value of MUMPS RINFOG(icntl) 2422 2423 Level: beginner 2424 2425 References: 2426 . MUMPS Users' Guide 2427 2428 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2429 @*/ 2430 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2431 { 2432 PetscErrorCode ierr; 2433 2434 PetscFunctionBegin; 2435 PetscValidType(F,1); 2436 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2437 PetscValidRealPointer(val,3); 2438 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 /*MC 2443 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2444 distributed and sequential matrices via the external package MUMPS. 2445 2446 Works with MATAIJ and MATSBAIJ matrices 2447 2448 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2449 2450 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2451 2452 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2453 2454 Options Database Keys: 2455 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2456 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2457 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2458 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2459 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2460 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2461 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2462 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2463 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2464 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2465 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2466 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2467 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2468 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2469 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2470 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2471 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2472 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2473 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2474 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2475 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2476 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2477 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2478 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2479 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2480 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2481 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2482 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2483 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2484 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2485 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2486 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2487 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2488 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2489 2490 Level: beginner 2491 2492 Notes: 2493 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2494 2495 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2496 $ KSPGetPC(ksp,&pc); 2497 $ PCFactorGetMatrix(pc,&mat); 2498 $ MatMumpsGetInfo(mat,....); 2499 $ MatMumpsGetInfog(mat,....); etc. 2500 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2501 2502 Two modes to run MUMPS/PETSc with OpenMP 2503 2504 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2505 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2506 2507 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2508 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2509 2510 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2511 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2512 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2513 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2514 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2515 2516 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2517 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2518 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2519 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2520 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2521 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2522 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2523 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2524 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2525 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2526 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2527 examine the mapping result. 2528 2529 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2530 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2531 calls omp_set_num_threads(m) internally before calling MUMPS. 2532 2533 References: 2534 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2535 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2536 2537 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2538 2539 M*/ 2540 2541 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2542 { 2543 PetscFunctionBegin; 2544 *type = MATSOLVERMUMPS; 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /* MatGetFactor for Seq and MPI AIJ matrices */ 2549 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2550 { 2551 Mat B; 2552 PetscErrorCode ierr; 2553 Mat_MUMPS *mumps; 2554 PetscBool isSeqAIJ; 2555 2556 PetscFunctionBegin; 2557 /* Create the factorization matrix */ 2558 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2559 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2560 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2561 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2562 ierr = MatSetUp(B);CHKERRQ(ierr); 2563 2564 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2565 2566 B->ops->view = MatView_MUMPS; 2567 B->ops->getinfo = MatGetInfo_MUMPS; 2568 2569 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2570 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2571 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2572 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2573 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2574 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2575 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2576 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2577 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2578 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2579 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2580 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2582 2583 if (ftype == MAT_FACTOR_LU) { 2584 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2585 B->factortype = MAT_FACTOR_LU; 2586 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2587 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2588 mumps->sym = 0; 2589 } else { 2590 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2591 B->factortype = MAT_FACTOR_CHOLESKY; 2592 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2593 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2594 #if defined(PETSC_USE_COMPLEX) 2595 mumps->sym = 2; 2596 #else 2597 if (A->spd_set && A->spd) mumps->sym = 1; 2598 else mumps->sym = 2; 2599 #endif 2600 } 2601 2602 /* set solvertype */ 2603 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2604 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2605 2606 B->ops->destroy = MatDestroy_MUMPS; 2607 B->data = (void*)mumps; 2608 2609 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2610 2611 *F = B; 2612 PetscFunctionReturn(0); 2613 } 2614 2615 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2616 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2617 { 2618 Mat B; 2619 PetscErrorCode ierr; 2620 Mat_MUMPS *mumps; 2621 PetscBool isSeqSBAIJ; 2622 2623 PetscFunctionBegin; 2624 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2625 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2626 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2627 /* Create the factorization matrix */ 2628 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2629 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2630 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2631 ierr = MatSetUp(B);CHKERRQ(ierr); 2632 2633 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2634 if (isSeqSBAIJ) { 2635 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2636 } else { 2637 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2638 } 2639 2640 B->ops->getinfo = MatGetInfo_External; 2641 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2642 B->ops->view = MatView_MUMPS; 2643 2644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2657 2658 B->factortype = MAT_FACTOR_CHOLESKY; 2659 #if defined(PETSC_USE_COMPLEX) 2660 mumps->sym = 2; 2661 #else 2662 if (A->spd_set && A->spd) mumps->sym = 1; 2663 else mumps->sym = 2; 2664 #endif 2665 2666 /* set solvertype */ 2667 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2668 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2669 2670 B->ops->destroy = MatDestroy_MUMPS; 2671 B->data = (void*)mumps; 2672 2673 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2674 2675 *F = B; 2676 PetscFunctionReturn(0); 2677 } 2678 2679 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2680 { 2681 Mat B; 2682 PetscErrorCode ierr; 2683 Mat_MUMPS *mumps; 2684 PetscBool isSeqBAIJ; 2685 2686 PetscFunctionBegin; 2687 /* Create the factorization matrix */ 2688 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2689 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2690 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2691 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2692 ierr = MatSetUp(B);CHKERRQ(ierr); 2693 2694 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2695 if (ftype == MAT_FACTOR_LU) { 2696 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2697 B->factortype = MAT_FACTOR_LU; 2698 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2699 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2700 mumps->sym = 0; 2701 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2702 2703 B->ops->getinfo = MatGetInfo_External; 2704 B->ops->view = MatView_MUMPS; 2705 2706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2707 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2710 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2718 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2719 2720 /* set solvertype */ 2721 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2722 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2723 2724 B->ops->destroy = MatDestroy_MUMPS; 2725 B->data = (void*)mumps; 2726 2727 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2728 2729 *F = B; 2730 PetscFunctionReturn(0); 2731 } 2732 2733 /* MatGetFactor for Seq and MPI SELL matrices */ 2734 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2735 { 2736 Mat B; 2737 PetscErrorCode ierr; 2738 Mat_MUMPS *mumps; 2739 PetscBool isSeqSELL; 2740 2741 PetscFunctionBegin; 2742 /* Create the factorization matrix */ 2743 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2744 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2745 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2746 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2747 ierr = MatSetUp(B);CHKERRQ(ierr); 2748 2749 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2750 2751 B->ops->view = MatView_MUMPS; 2752 B->ops->getinfo = MatGetInfo_MUMPS; 2753 2754 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2755 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2756 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2757 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2758 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2759 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2761 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2762 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2763 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2764 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2765 2766 if (ftype == MAT_FACTOR_LU) { 2767 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2768 B->factortype = MAT_FACTOR_LU; 2769 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2770 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2771 mumps->sym = 0; 2772 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2773 2774 /* set solvertype */ 2775 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2776 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2777 2778 B->ops->destroy = MatDestroy_MUMPS; 2779 B->data = (void*)mumps; 2780 2781 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2782 2783 *F = B; 2784 PetscFunctionReturn(0); 2785 } 2786 2787 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2788 { 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2793 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2794 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2795 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2796 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2797 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2798 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2799 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2800 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2801 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2802 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2803 PetscFunctionReturn(0); 2804 } 2805 2806