1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij (bs=1) format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 339 PetscInt nz,rnz,i,j; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 343 #if defined(PETSC_USE_COMPLEX) 344 PetscBool hermitian; 345 #endif 346 347 PetscFunctionBegin; 348 #if defined(PETSC_USE_COMPLEX) 349 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 350 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 351 #endif 352 *v = aa->a; 353 if (reuse == MAT_INITIAL_MATRIX) { 354 nz = aa->nz; 355 ai = aa->i; 356 aj = aa->j; 357 *v = aa->a; 358 *nnz = nz; 359 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 360 col = row + nz; 361 362 nz = 0; 363 for (i=0; i<M; i++) { 364 rnz = ai[i+1] - ai[i]; 365 ajj = aj + ai[i]; 366 for (j=0; j<rnz; j++) { 367 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 368 } 369 } 370 *r = row; *c = col; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 376 { 377 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 378 PetscInt nz,rnz,i,j; 379 const PetscScalar *av,*v1; 380 PetscScalar *val; 381 PetscErrorCode ierr; 382 PetscInt *row,*col; 383 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 384 PetscBool missing; 385 #if defined(PETSC_USE_COMPLEX) 386 PetscBool hermitian; 387 #endif 388 389 PetscFunctionBegin; 390 #if defined(PETSC_USE_COMPLEX) 391 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 392 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 393 #endif 394 ai = aa->i; aj = aa->j; av = aa->a; 395 adiag = aa->diag; 396 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 397 if (reuse == MAT_INITIAL_MATRIX) { 398 /* count nz in the upper triangular part of A */ 399 nz = 0; 400 if (missing) { 401 for (i=0; i<M; i++) { 402 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 403 for (j=ai[i];j<ai[i+1];j++) { 404 if (aj[j] < i) continue; 405 nz++; 406 } 407 } else { 408 nz += ai[i+1] - adiag[i]; 409 } 410 } 411 } else { 412 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 413 } 414 *nnz = nz; 415 416 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 417 col = row + nz; 418 val = (PetscScalar*)(col + nz); 419 420 nz = 0; 421 if (missing) { 422 for (i=0; i<M; i++) { 423 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 424 for (j=ai[i];j<ai[i+1];j++) { 425 if (aj[j] < i) continue; 426 row[nz] = i+shift; 427 col[nz] = aj[j]+shift; 428 val[nz] = av[j]; 429 nz++; 430 } 431 } else { 432 rnz = ai[i+1] - adiag[i]; 433 ajj = aj + adiag[i]; 434 v1 = av + adiag[i]; 435 for (j=0; j<rnz; j++) { 436 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 437 } 438 } 439 } 440 } else { 441 for (i=0; i<M; i++) { 442 rnz = ai[i+1] - adiag[i]; 443 ajj = aj + adiag[i]; 444 v1 = av + adiag[i]; 445 for (j=0; j<rnz; j++) { 446 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 447 } 448 } 449 } 450 *r = row; *c = col; *v = val; 451 } else { 452 nz = 0; val = *v; 453 if (missing) { 454 for (i=0; i <M; i++) { 455 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 456 for (j=ai[i];j<ai[i+1];j++) { 457 if (aj[j] < i) continue; 458 val[nz++] = av[j]; 459 } 460 } else { 461 rnz = ai[i+1] - adiag[i]; 462 v1 = av + adiag[i]; 463 for (j=0; j<rnz; j++) { 464 val[nz++] = v1[j]; 465 } 466 } 467 } 468 } else { 469 for (i=0; i <M; i++) { 470 rnz = ai[i+1] - adiag[i]; 471 v1 = av + adiag[i]; 472 for (j=0; j<rnz; j++) { 473 val[nz++] = v1[j]; 474 } 475 } 476 } 477 } 478 PetscFunctionReturn(0); 479 } 480 481 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 482 { 483 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 484 PetscErrorCode ierr; 485 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 486 PetscInt *row,*col; 487 const PetscScalar *av, *bv,*v1,*v2; 488 PetscScalar *val; 489 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 490 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 491 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 492 #if defined(PETSC_USE_COMPLEX) 493 PetscBool hermitian; 494 #endif 495 496 PetscFunctionBegin; 497 #if defined(PETSC_USE_COMPLEX) 498 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 499 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 500 #endif 501 rstart = A->rmap->rstart; 502 ai = aa->i; 503 aj = aa->j; 504 bi = bb->i; 505 bj = bb->j; 506 av = aa->a; 507 bv = bb->a; 508 509 garray = mat->garray; 510 511 if (reuse == MAT_INITIAL_MATRIX) { 512 nz = aa->nz + bb->nz; 513 *nnz = nz; 514 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 515 col = row + nz; 516 val = (PetscScalar*)(col + nz); 517 518 *r = row; *c = col; *v = val; 519 } else { 520 row = *r; col = *c; val = *v; 521 } 522 523 jj = 0; irow = rstart; 524 for (i=0; i<m; i++) { 525 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 526 countA = ai[i+1] - ai[i]; 527 countB = bi[i+1] - bi[i]; 528 bjj = bj + bi[i]; 529 v1 = av + ai[i]; 530 v2 = bv + bi[i]; 531 532 /* A-part */ 533 for (j=0; j<countA; j++) { 534 if (reuse == MAT_INITIAL_MATRIX) { 535 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 536 } 537 val[jj++] = v1[j]; 538 } 539 540 /* B-part */ 541 for (j=0; j < countB; j++) { 542 if (reuse == MAT_INITIAL_MATRIX) { 543 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 544 } 545 val[jj++] = v2[j]; 546 } 547 irow++; 548 } 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 553 { 554 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 555 PetscErrorCode ierr; 556 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 557 PetscInt *row,*col; 558 const PetscScalar *av, *bv,*v1,*v2; 559 PetscScalar *val; 560 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 561 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 562 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 563 564 PetscFunctionBegin; 565 rstart = A->rmap->rstart; 566 ai = aa->i; 567 aj = aa->j; 568 bi = bb->i; 569 bj = bb->j; 570 av = aa->a; 571 bv = bb->a; 572 573 garray = mat->garray; 574 575 if (reuse == MAT_INITIAL_MATRIX) { 576 nz = aa->nz + bb->nz; 577 *nnz = nz; 578 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 579 col = row + nz; 580 val = (PetscScalar*)(col + nz); 581 582 *r = row; *c = col; *v = val; 583 } else { 584 row = *r; col = *c; val = *v; 585 } 586 587 jj = 0; irow = rstart; 588 for (i=0; i<m; i++) { 589 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 590 countA = ai[i+1] - ai[i]; 591 countB = bi[i+1] - bi[i]; 592 bjj = bj + bi[i]; 593 v1 = av + ai[i]; 594 v2 = bv + bi[i]; 595 596 /* A-part */ 597 for (j=0; j<countA; j++) { 598 if (reuse == MAT_INITIAL_MATRIX) { 599 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 600 } 601 val[jj++] = v1[j]; 602 } 603 604 /* B-part */ 605 for (j=0; j < countB; j++) { 606 if (reuse == MAT_INITIAL_MATRIX) { 607 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 608 } 609 val[jj++] = v2[j]; 610 } 611 irow++; 612 } 613 PetscFunctionReturn(0); 614 } 615 616 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 617 { 618 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 619 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 620 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 621 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 622 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 623 const PetscInt bs2=mat->bs2; 624 PetscErrorCode ierr; 625 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 626 PetscInt *row,*col; 627 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 628 PetscScalar *val; 629 630 PetscFunctionBegin; 631 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 632 if (reuse == MAT_INITIAL_MATRIX) { 633 nz = bs2*(aa->nz + bb->nz); 634 *nnz = nz; 635 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 636 col = row + nz; 637 val = (PetscScalar*)(col + nz); 638 639 *r = row; *c = col; *v = val; 640 } else { 641 row = *r; col = *c; val = *v; 642 } 643 644 jj = 0; irow = rstart; 645 for (i=0; i<mbs; i++) { 646 countA = ai[i+1] - ai[i]; 647 countB = bi[i+1] - bi[i]; 648 ajj = aj + ai[i]; 649 bjj = bj + bi[i]; 650 v1 = av + bs2*ai[i]; 651 v2 = bv + bs2*bi[i]; 652 653 idx = 0; 654 /* A-part */ 655 for (k=0; k<countA; k++) { 656 for (j=0; j<bs; j++) { 657 for (n=0; n<bs; n++) { 658 if (reuse == MAT_INITIAL_MATRIX) { 659 row[jj] = irow + n + shift; 660 col[jj] = rstart + bs*ajj[k] + j + shift; 661 } 662 val[jj++] = v1[idx++]; 663 } 664 } 665 } 666 667 idx = 0; 668 /* B-part */ 669 for (k=0; k<countB; k++) { 670 for (j=0; j<bs; j++) { 671 for (n=0; n<bs; n++) { 672 if (reuse == MAT_INITIAL_MATRIX) { 673 row[jj] = irow + n + shift; 674 col[jj] = bs*garray[bjj[k]] + j + shift; 675 } 676 val[jj++] = v2[idx++]; 677 } 678 } 679 } 680 irow += bs; 681 } 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 686 { 687 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 688 PetscErrorCode ierr; 689 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 690 PetscInt *row,*col; 691 const PetscScalar *av, *bv,*v1,*v2; 692 PetscScalar *val; 693 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 694 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 695 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 696 #if defined(PETSC_USE_COMPLEX) 697 PetscBool hermitian; 698 #endif 699 700 PetscFunctionBegin; 701 #if defined(PETSC_USE_COMPLEX) 702 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 703 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 704 #endif 705 ai = aa->i; 706 aj = aa->j; 707 adiag = aa->diag; 708 bi = bb->i; 709 bj = bb->j; 710 garray = mat->garray; 711 av = aa->a; 712 bv = bb->a; 713 714 rstart = A->rmap->rstart; 715 716 if (reuse == MAT_INITIAL_MATRIX) { 717 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 718 nzb = 0; /* num of upper triangular entries in mat->B */ 719 for (i=0; i<m; i++) { 720 nza += (ai[i+1] - adiag[i]); 721 countB = bi[i+1] - bi[i]; 722 bjj = bj + bi[i]; 723 for (j=0; j<countB; j++) { 724 if (garray[bjj[j]] > rstart) nzb++; 725 } 726 } 727 728 nz = nza + nzb; /* total nz of upper triangular part of mat */ 729 *nnz = nz; 730 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 731 col = row + nz; 732 val = (PetscScalar*)(col + nz); 733 734 *r = row; *c = col; *v = val; 735 } else { 736 row = *r; col = *c; val = *v; 737 } 738 739 jj = 0; irow = rstart; 740 for (i=0; i<m; i++) { 741 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 742 v1 = av + adiag[i]; 743 countA = ai[i+1] - adiag[i]; 744 countB = bi[i+1] - bi[i]; 745 bjj = bj + bi[i]; 746 v2 = bv + bi[i]; 747 748 /* A-part */ 749 for (j=0; j<countA; j++) { 750 if (reuse == MAT_INITIAL_MATRIX) { 751 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 752 } 753 val[jj++] = v1[j]; 754 } 755 756 /* B-part */ 757 for (j=0; j < countB; j++) { 758 if (garray[bjj[j]] > rstart) { 759 if (reuse == MAT_INITIAL_MATRIX) { 760 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 761 } 762 val[jj++] = v2[j]; 763 } 764 } 765 irow++; 766 } 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatDestroy_MUMPS(Mat A) 771 { 772 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 777 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 778 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 779 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 780 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 781 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 782 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 783 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 784 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 785 mumps->id.job = JOB_END; 786 PetscMUMPS_c(mumps); 787 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 788 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 789 #endif 790 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 791 ierr = PetscFree(A->data);CHKERRQ(ierr); 792 793 /* clear composed functions */ 794 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 797 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 798 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 799 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 800 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 801 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 802 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 803 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 804 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 805 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 806 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 811 { 812 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 813 PetscScalar *array; 814 Vec b_seq; 815 IS is_iden,is_petsc; 816 PetscErrorCode ierr; 817 PetscInt i; 818 PetscBool second_solve = PETSC_FALSE; 819 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 820 821 PetscFunctionBegin; 822 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 823 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 824 825 if (A->factorerrortype) { 826 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 827 ierr = VecSetInf(x);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 mumps->id.ICNTL(20)= 0; /* dense RHS */ 832 mumps->id.nrhs = 1; 833 b_seq = mumps->b_seq; 834 if (mumps->petsc_size > 1) { 835 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 836 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 837 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 839 } else { /* petsc_size == 1 */ 840 ierr = VecCopy(b,x);CHKERRQ(ierr); 841 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 842 } 843 if (!mumps->myid) { /* define rhs on the host */ 844 mumps->id.nrhs = 1; 845 mumps->id.rhs = (MumpsScalar*)array; 846 } 847 848 /* 849 handle condensation step of Schur complement (if any) 850 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 851 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 852 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 853 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 854 */ 855 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 856 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 857 second_solve = PETSC_TRUE; 858 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 859 } 860 /* solve phase */ 861 /*-------------*/ 862 mumps->id.job = JOB_SOLVE; 863 PetscMUMPS_c(mumps); 864 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 865 866 /* handle expansion step of Schur complement (if any) */ 867 if (second_solve) { 868 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 869 } 870 871 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 872 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 873 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 874 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 875 } 876 if (!mumps->scat_sol) { /* create scatter scat_sol */ 877 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 878 for (i=0; i<mumps->id.lsol_loc; i++) { 879 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 880 } 881 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 882 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 883 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 884 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 885 886 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 887 } 888 889 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 } 892 893 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 894 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 895 896 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 901 { 902 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 903 PetscErrorCode ierr; 904 905 PetscFunctionBegin; 906 mumps->id.ICNTL(9) = 0; 907 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 908 mumps->id.ICNTL(9) = 1; 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 913 { 914 PetscErrorCode ierr; 915 Mat Bt = NULL; 916 PetscBool flg, flgT; 917 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 918 PetscInt i,nrhs,M; 919 PetscScalar *array; 920 const PetscScalar *rbray; 921 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 922 PetscScalar *bray,*sol_loc,*sol_loc_save; 923 IS is_to,is_from; 924 PetscInt k,proc,j,m,myrstart; 925 const PetscInt *rstart; 926 Vec v_mpi,b_seq,msol_loc; 927 VecScatter scat_rhs,scat_sol; 928 PetscScalar *aa; 929 PetscInt spnr,*ia,*ja; 930 Mat_MPIAIJ *b = NULL; 931 932 PetscFunctionBegin; 933 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 934 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 935 936 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 937 if (flg) { /* dense B */ 938 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 939 mumps->id.ICNTL(20)= 0; /* dense RHS */ 940 } else { /* sparse B */ 941 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 942 if (flgT) { /* input B is transpose of actural RHS matrix, 943 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 944 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 945 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 946 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 947 } 948 949 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 950 mumps->id.nrhs = nrhs; 951 mumps->id.lrhs = M; 952 mumps->id.rhs = NULL; 953 954 if (mumps->petsc_size == 1) { 955 PetscScalar *aa; 956 PetscInt spnr,*ia,*ja; 957 PetscBool second_solve = PETSC_FALSE; 958 959 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 960 mumps->id.rhs = (MumpsScalar*)array; 961 962 if (!Bt) { /* dense B */ 963 /* copy B to X */ 964 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 965 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 966 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 967 } else { /* sparse B */ 968 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 969 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 970 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 971 /* mumps requires ia and ja start at 1! */ 972 mumps->id.irhs_ptr = ia; 973 mumps->id.irhs_sparse = ja; 974 mumps->id.nz_rhs = ia[spnr] - 1; 975 mumps->id.rhs_sparse = (MumpsScalar*)aa; 976 } 977 /* handle condensation step of Schur complement (if any) */ 978 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 979 second_solve = PETSC_TRUE; 980 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 981 } 982 /* solve phase */ 983 /*-------------*/ 984 mumps->id.job = JOB_SOLVE; 985 PetscMUMPS_c(mumps); 986 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 987 988 /* handle expansion step of Schur complement (if any) */ 989 if (second_solve) { 990 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 991 } 992 if (Bt) { /* sparse B */ 993 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 994 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 995 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 996 } 997 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1002 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1003 1004 /* create msol_loc to hold mumps local solution */ 1005 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1006 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1007 1008 lsol_loc = mumps->id.lsol_loc; 1009 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1010 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1011 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1012 mumps->id.isol_loc = isol_loc; 1013 1014 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1015 1016 if (!Bt) { /* dense B */ 1017 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1018 /* wrap dense rhs matrix B into a vector v_mpi */ 1019 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1020 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1021 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1022 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1023 1024 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1025 if (!mumps->myid) { 1026 PetscInt *idx; 1027 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1028 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1029 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1030 k = 0; 1031 for (proc=0; proc<mumps->petsc_size; proc++){ 1032 for (j=0; j<nrhs; j++){ 1033 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1034 } 1035 } 1036 1037 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1038 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1039 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1040 } else { 1041 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1042 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1043 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1044 } 1045 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1046 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1048 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 1051 if (!mumps->myid) { /* define rhs on the host */ 1052 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1053 mumps->id.rhs = (MumpsScalar*)bray; 1054 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1055 } 1056 1057 } else { /* sparse B */ 1058 b = (Mat_MPIAIJ*)Bt->data; 1059 1060 /* wrap dense X into a vector v_mpi */ 1061 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1062 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1063 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1064 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1065 1066 if (!mumps->myid) { 1067 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1068 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1069 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1070 /* mumps requires ia and ja start at 1! */ 1071 mumps->id.irhs_ptr = ia; 1072 mumps->id.irhs_sparse = ja; 1073 mumps->id.nz_rhs = ia[spnr] - 1; 1074 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1075 } else { 1076 mumps->id.irhs_ptr = NULL; 1077 mumps->id.irhs_sparse = NULL; 1078 mumps->id.nz_rhs = 0; 1079 mumps->id.rhs_sparse = NULL; 1080 } 1081 } 1082 1083 /* solve phase */ 1084 /*-------------*/ 1085 mumps->id.job = JOB_SOLVE; 1086 PetscMUMPS_c(mumps); 1087 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1088 1089 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1090 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1091 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1092 1093 /* create scatter scat_sol */ 1094 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1095 /* iidx: index for scatter mumps solution to petsc X */ 1096 1097 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1098 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1099 for (i=0; i<lsol_loc; i++) { 1100 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1101 1102 for (proc=0; proc<mumps->petsc_size; proc++){ 1103 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1104 myrstart = rstart[proc]; 1105 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1106 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1107 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1108 break; 1109 } 1110 } 1111 1112 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1113 } 1114 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1115 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1116 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1118 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1121 1122 /* free spaces */ 1123 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1124 mumps->id.isol_loc = isol_loc_save; 1125 1126 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1127 ierr = PetscFree(idxx);CHKERRQ(ierr); 1128 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1129 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1130 if (Bt) { 1131 if (!mumps->myid) { 1132 b = (Mat_MPIAIJ*)Bt->data; 1133 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1134 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1135 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1136 } 1137 } else { 1138 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1139 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1140 } 1141 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1147 { 1148 PetscErrorCode ierr; 1149 PetscBool flg; 1150 Mat B; 1151 1152 PetscFunctionBegin; 1153 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1154 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1155 1156 /* Create B=Bt^T that uses Bt's data structure */ 1157 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1158 1159 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1160 ierr = MatDestroy(&B);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #if !defined(PETSC_USE_COMPLEX) 1165 /* 1166 input: 1167 F: numeric factor 1168 output: 1169 nneg: total number of negative pivots 1170 nzero: total number of zero pivots 1171 npos: (global dimension of F) - nneg - nzero 1172 */ 1173 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1174 { 1175 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1176 PetscErrorCode ierr; 1177 PetscMPIInt size; 1178 1179 PetscFunctionBegin; 1180 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1181 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1182 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1183 1184 if (nneg) *nneg = mumps->id.INFOG(12); 1185 if (nzero || npos) { 1186 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1187 if (nzero) *nzero = mumps->id.INFOG(28); 1188 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1189 } 1190 PetscFunctionReturn(0); 1191 } 1192 #endif 1193 1194 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1195 { 1196 PetscErrorCode ierr; 1197 PetscInt i,nz=0,*irn,*jcn=0; 1198 PetscScalar *val=0; 1199 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1200 1201 PetscFunctionBegin; 1202 if (mumps->omp_comm_size > 1) { 1203 if (reuse == MAT_INITIAL_MATRIX) { 1204 /* master first gathers counts of nonzeros to receive */ 1205 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1206 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1207 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1208 1209 /* master allocates memory to receive nonzeros */ 1210 if (mumps->is_omp_master) { 1211 displs[0] = 0; 1212 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1213 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1214 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1215 jcn = irn + nz; 1216 val = (PetscScalar*)(jcn + nz); 1217 } 1218 1219 /* save the gatherv plan */ 1220 mumps->mpinz = mpinz; /* used as send count */ 1221 mumps->recvcount = recvcount; 1222 mumps->displs = displs; 1223 1224 /* master gathers nonzeros */ 1225 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1226 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1227 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1228 1229 /* master frees its row/col/val and replaces them with bigger arrays */ 1230 if (mumps->is_omp_master) { 1231 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1232 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1233 mumps->irn = irn; 1234 mumps->jcn = jcn; 1235 mumps->val = val; 1236 } 1237 } else { 1238 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1239 } 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1245 { 1246 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1247 PetscErrorCode ierr; 1248 PetscBool isMPIAIJ; 1249 1250 PetscFunctionBegin; 1251 if (mumps->id.INFOG(1) < 0) { 1252 if (mumps->id.INFOG(1) == -6) { 1253 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1254 } 1255 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1260 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1261 1262 /* numerical factorization phase */ 1263 /*-------------------------------*/ 1264 mumps->id.job = JOB_FACTNUMERIC; 1265 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1266 if (!mumps->myid) { 1267 mumps->id.a = (MumpsScalar*)mumps->val; 1268 } 1269 } else { 1270 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1271 } 1272 PetscMUMPS_c(mumps); 1273 if (mumps->id.INFOG(1) < 0) { 1274 if (A->erroriffailure) { 1275 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1276 } else { 1277 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1278 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1279 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1280 } else if (mumps->id.INFOG(1) == -13) { 1281 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1282 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1283 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1284 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1285 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1286 } else { 1287 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1288 F->factorerrortype = MAT_FACTOR_OTHER; 1289 } 1290 } 1291 } 1292 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1293 1294 F->assembled = PETSC_TRUE; 1295 mumps->matstruc = SAME_NONZERO_PATTERN; 1296 if (F->schur) { /* reset Schur status to unfactored */ 1297 #if defined(PETSC_HAVE_CUDA) 1298 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1299 #endif 1300 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1301 mumps->id.ICNTL(19) = 2; 1302 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1303 } 1304 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1305 } 1306 1307 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1308 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1309 1310 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1311 if (mumps->petsc_size > 1) { 1312 PetscInt lsol_loc; 1313 PetscScalar *sol_loc; 1314 1315 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1316 1317 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1318 if (mumps->x_seq) { 1319 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1320 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1321 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1322 } 1323 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1324 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1325 mumps->id.lsol_loc = lsol_loc; 1326 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1327 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1328 } 1329 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Sets MUMPS options from the options database */ 1334 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1335 { 1336 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1337 PetscErrorCode ierr; 1338 PetscInt icntl,info[80],i,ninfo=80; 1339 PetscBool flg; 1340 1341 PetscFunctionBegin; 1342 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1343 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1344 if (flg) mumps->id.ICNTL(1) = icntl; 1345 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1346 if (flg) mumps->id.ICNTL(2) = icntl; 1347 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1348 if (flg) mumps->id.ICNTL(3) = icntl; 1349 1350 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1351 if (flg) mumps->id.ICNTL(4) = icntl; 1352 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1353 1354 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1355 if (flg) mumps->id.ICNTL(6) = icntl; 1356 1357 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1358 if (flg) { 1359 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1360 else mumps->id.ICNTL(7) = icntl; 1361 } 1362 1363 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1364 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1365 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1366 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1367 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1368 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1369 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1370 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1371 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1372 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1373 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1374 } 1375 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1376 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1377 1378 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1379 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1380 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1381 if (mumps->id.ICNTL(24)) { 1382 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1383 } 1384 1385 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1386 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1387 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1388 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1389 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1390 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1391 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1392 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1393 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1394 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1395 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1396 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1397 1398 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1399 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1400 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1401 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1402 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1403 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1404 1405 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1406 1407 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1408 if (ninfo) { 1409 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1410 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1411 mumps->ninfo = ninfo; 1412 for (i=0; i<ninfo; i++) { 1413 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1414 else mumps->info[i] = info[i]; 1415 } 1416 } 1417 1418 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1423 { 1424 PetscErrorCode ierr; 1425 PetscInt nthreads=0; 1426 1427 PetscFunctionBegin; 1428 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1429 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1430 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1431 1432 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1433 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1434 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1435 if (mumps->use_petsc_omp_support) { 1436 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1437 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1438 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1439 #else 1440 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1441 #endif 1442 } else { 1443 mumps->omp_comm = PETSC_COMM_SELF; 1444 mumps->mumps_comm = mumps->petsc_comm; 1445 mumps->is_omp_master = PETSC_TRUE; 1446 } 1447 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1448 1449 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1450 mumps->id.job = JOB_INIT; 1451 mumps->id.par = 1; /* host participates factorizaton and solve */ 1452 mumps->id.sym = mumps->sym; 1453 1454 PetscMUMPS_c(mumps); 1455 1456 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1457 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1458 */ 1459 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1460 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1461 1462 mumps->scat_rhs = NULL; 1463 mumps->scat_sol = NULL; 1464 1465 /* set PETSc-MUMPS default options - override MUMPS default */ 1466 mumps->id.ICNTL(3) = 0; 1467 mumps->id.ICNTL(4) = 0; 1468 if (mumps->petsc_size == 1) { 1469 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1470 } else { 1471 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1472 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1473 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1474 } 1475 1476 /* schur */ 1477 mumps->id.size_schur = 0; 1478 mumps->id.listvar_schur = NULL; 1479 mumps->id.schur = NULL; 1480 mumps->sizeredrhs = 0; 1481 mumps->schur_sol = NULL; 1482 mumps->schur_sizesol = 0; 1483 PetscFunctionReturn(0); 1484 } 1485 1486 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1487 { 1488 PetscErrorCode ierr; 1489 1490 PetscFunctionBegin; 1491 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1492 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1493 if (mumps->id.INFOG(1) < 0) { 1494 if (A->erroriffailure) { 1495 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1496 } else { 1497 if (mumps->id.INFOG(1) == -6) { 1498 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1499 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1500 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1501 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1502 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1503 } else { 1504 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1505 F->factorerrortype = MAT_FACTOR_OTHER; 1506 } 1507 } 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1513 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1514 { 1515 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1516 PetscErrorCode ierr; 1517 Vec b; 1518 IS is_iden; 1519 const PetscInt M = A->rmap->N; 1520 1521 PetscFunctionBegin; 1522 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1523 1524 /* Set MUMPS options from the options database */ 1525 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1526 1527 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1528 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1529 1530 /* analysis phase */ 1531 /*----------------*/ 1532 mumps->id.job = JOB_FACTSYMBOLIC; 1533 mumps->id.n = M; 1534 switch (mumps->id.ICNTL(18)) { 1535 case 0: /* centralized assembled matrix input */ 1536 if (!mumps->myid) { 1537 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1538 if (mumps->id.ICNTL(6)>1) { 1539 mumps->id.a = (MumpsScalar*)mumps->val; 1540 } 1541 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1542 /* 1543 PetscBool flag; 1544 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1545 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1546 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1547 */ 1548 if (!mumps->myid) { 1549 const PetscInt *idx; 1550 PetscInt i,*perm_in; 1551 1552 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1553 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1554 1555 mumps->id.perm_in = perm_in; 1556 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1557 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1558 } 1559 } 1560 } 1561 break; 1562 case 3: /* distributed assembled matrix input (size>1) */ 1563 mumps->id.nz_loc = mumps->nz; 1564 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1565 if (mumps->id.ICNTL(6)>1) { 1566 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1567 } 1568 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1569 if (!mumps->myid) { 1570 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->N,&mumps->b_seq);CHKERRQ(ierr); 1571 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap->N,0,1,&is_iden);CHKERRQ(ierr); 1572 } else { 1573 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1574 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1575 } 1576 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1577 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1578 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1579 ierr = VecDestroy(&b);CHKERRQ(ierr); 1580 break; 1581 } 1582 PetscMUMPS_c(mumps); 1583 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1584 1585 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1586 F->ops->solve = MatSolve_MUMPS; 1587 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1588 F->ops->matsolve = MatMatSolve_MUMPS; 1589 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1590 PetscFunctionReturn(0); 1591 } 1592 1593 /* Note the Petsc r and c permutations are ignored */ 1594 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1595 { 1596 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1597 PetscErrorCode ierr; 1598 Vec b; 1599 IS is_iden; 1600 const PetscInt M = A->rmap->N; 1601 1602 PetscFunctionBegin; 1603 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1604 1605 /* Set MUMPS options from the options database */ 1606 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1607 1608 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1609 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1610 1611 /* analysis phase */ 1612 /*----------------*/ 1613 mumps->id.job = JOB_FACTSYMBOLIC; 1614 mumps->id.n = M; 1615 switch (mumps->id.ICNTL(18)) { 1616 case 0: /* centralized assembled matrix input */ 1617 if (!mumps->myid) { 1618 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1619 if (mumps->id.ICNTL(6)>1) { 1620 mumps->id.a = (MumpsScalar*)mumps->val; 1621 } 1622 } 1623 break; 1624 case 3: /* distributed assembled matrix input (size>1) */ 1625 mumps->id.nz_loc = mumps->nz; 1626 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1627 if (mumps->id.ICNTL(6)>1) { 1628 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1629 } 1630 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1631 if (!mumps->myid) { 1632 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1633 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1634 } else { 1635 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1636 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1637 } 1638 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1639 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1640 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1641 ierr = VecDestroy(&b);CHKERRQ(ierr); 1642 break; 1643 } 1644 PetscMUMPS_c(mumps); 1645 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1646 1647 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1648 F->ops->solve = MatSolve_MUMPS; 1649 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 /* Note the Petsc r permutation and factor info are ignored */ 1654 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1655 { 1656 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1657 PetscErrorCode ierr; 1658 Vec b; 1659 IS is_iden; 1660 const PetscInt M = A->rmap->N; 1661 1662 PetscFunctionBegin; 1663 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1664 1665 /* Set MUMPS options from the options database */ 1666 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1667 1668 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1669 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1670 1671 /* analysis phase */ 1672 /*----------------*/ 1673 mumps->id.job = JOB_FACTSYMBOLIC; 1674 mumps->id.n = M; 1675 switch (mumps->id.ICNTL(18)) { 1676 case 0: /* centralized assembled matrix input */ 1677 if (!mumps->myid) { 1678 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1679 if (mumps->id.ICNTL(6)>1) { 1680 mumps->id.a = (MumpsScalar*)mumps->val; 1681 } 1682 } 1683 break; 1684 case 3: /* distributed assembled matrix input (size>1) */ 1685 mumps->id.nz_loc = mumps->nz; 1686 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1687 if (mumps->id.ICNTL(6)>1) { 1688 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1689 } 1690 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1691 if (!mumps->myid) { 1692 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1693 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1694 } else { 1695 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1696 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1697 } 1698 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1699 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1700 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1701 ierr = VecDestroy(&b);CHKERRQ(ierr); 1702 break; 1703 } 1704 PetscMUMPS_c(mumps); 1705 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1706 1707 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1708 F->ops->solve = MatSolve_MUMPS; 1709 F->ops->solvetranspose = MatSolve_MUMPS; 1710 F->ops->matsolve = MatMatSolve_MUMPS; 1711 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1712 #if defined(PETSC_USE_COMPLEX) 1713 F->ops->getinertia = NULL; 1714 #else 1715 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1716 #endif 1717 PetscFunctionReturn(0); 1718 } 1719 1720 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1721 { 1722 PetscErrorCode ierr; 1723 PetscBool iascii; 1724 PetscViewerFormat format; 1725 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1726 1727 PetscFunctionBegin; 1728 /* check if matrix is mumps type */ 1729 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1730 1731 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1732 if (iascii) { 1733 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1734 if (format == PETSC_VIEWER_ASCII_INFO) { 1735 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1736 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1737 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1738 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1739 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1740 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1741 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1742 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1743 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1744 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1745 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1746 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1747 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1748 if (mumps->id.ICNTL(11)>0) { 1749 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1750 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1751 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1752 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1753 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1754 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1755 } 1756 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1757 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1758 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1759 /* ICNTL(15-17) not used */ 1760 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1761 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1762 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1763 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1764 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1765 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1766 1767 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1768 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1769 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1770 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1771 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1773 1774 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1780 1781 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1787 1788 /* infomation local to each processor */ 1789 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1791 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1792 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1795 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1796 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1797 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1798 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1799 1800 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1801 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1802 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1803 1804 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1805 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1806 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1807 1808 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1809 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1810 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1811 1812 if (mumps->ninfo && mumps->ninfo <= 80){ 1813 PetscInt i; 1814 for (i=0; i<mumps->ninfo; i++){ 1815 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1816 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1817 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1818 } 1819 } 1820 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1821 1822 if (!mumps->myid) { /* information from the host */ 1823 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1827 1828 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1838 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1845 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1852 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1859 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1860 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1862 } 1863 } 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1869 { 1870 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1871 1872 PetscFunctionBegin; 1873 info->block_size = 1.0; 1874 info->nz_allocated = mumps->id.INFOG(20); 1875 info->nz_used = mumps->id.INFOG(20); 1876 info->nz_unneeded = 0.0; 1877 info->assemblies = 0.0; 1878 info->mallocs = 0.0; 1879 info->memory = 0.0; 1880 info->fill_ratio_given = 0; 1881 info->fill_ratio_needed = 0; 1882 info->factor_mallocs = 0; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 /* -------------------------------------------------------------------------------------------*/ 1887 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1888 { 1889 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1890 const PetscInt *idxs; 1891 PetscInt size,i; 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1896 if (mumps->petsc_size > 1) { 1897 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1898 1899 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1900 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1901 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1902 } 1903 if (mumps->id.size_schur != size) { 1904 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1905 mumps->id.size_schur = size; 1906 mumps->id.schur_lld = size; 1907 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1908 } 1909 1910 /* Schur complement matrix */ 1911 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1912 if (mumps->sym == 1) { 1913 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1914 } 1915 1916 /* MUMPS expects Fortran style indices */ 1917 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1918 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1919 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1920 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1921 if (mumps->petsc_size > 1) { 1922 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1923 } else { 1924 if (F->factortype == MAT_FACTOR_LU) { 1925 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1926 } else { 1927 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1928 } 1929 } 1930 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1931 mumps->id.ICNTL(26) = -1; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 /* -------------------------------------------------------------------------------------------*/ 1936 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1937 { 1938 Mat St; 1939 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1940 PetscScalar *array; 1941 #if defined(PETSC_USE_COMPLEX) 1942 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1943 #endif 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1948 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1949 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1950 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1951 ierr = MatSetUp(St);CHKERRQ(ierr); 1952 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1953 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1954 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1955 PetscInt i,j,N=mumps->id.size_schur; 1956 for (i=0;i<N;i++) { 1957 for (j=0;j<N;j++) { 1958 #if !defined(PETSC_USE_COMPLEX) 1959 PetscScalar val = mumps->id.schur[i*N+j]; 1960 #else 1961 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1962 #endif 1963 array[j*N+i] = val; 1964 } 1965 } 1966 } else { /* stored by columns */ 1967 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1968 } 1969 } else { /* either full or lower-triangular (not packed) */ 1970 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 1971 PetscInt i,j,N=mumps->id.size_schur; 1972 for (i=0;i<N;i++) { 1973 for (j=i;j<N;j++) { 1974 #if !defined(PETSC_USE_COMPLEX) 1975 PetscScalar val = mumps->id.schur[i*N+j]; 1976 #else 1977 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1978 #endif 1979 array[i*N+j] = val; 1980 array[j*N+i] = val; 1981 } 1982 } 1983 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 1984 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 1985 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 1986 PetscInt i,j,N=mumps->id.size_schur; 1987 for (i=0;i<N;i++) { 1988 for (j=0;j<i+1;j++) { 1989 #if !defined(PETSC_USE_COMPLEX) 1990 PetscScalar val = mumps->id.schur[i*N+j]; 1991 #else 1992 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1993 #endif 1994 array[i*N+j] = val; 1995 array[j*N+i] = val; 1996 } 1997 } 1998 } 1999 } 2000 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2001 *S = St; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 /* -------------------------------------------------------------------------------------------*/ 2006 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2007 { 2008 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2009 2010 PetscFunctionBegin; 2011 mumps->id.ICNTL(icntl) = ival; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2016 { 2017 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2018 2019 PetscFunctionBegin; 2020 *ival = mumps->id.ICNTL(icntl); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 /*@ 2025 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2026 2027 Logically Collective on Mat 2028 2029 Input Parameters: 2030 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2031 . icntl - index of MUMPS parameter array ICNTL() 2032 - ival - value of MUMPS ICNTL(icntl) 2033 2034 Options Database: 2035 . -mat_mumps_icntl_<icntl> <ival> 2036 2037 Level: beginner 2038 2039 References: 2040 . MUMPS Users' Guide 2041 2042 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2043 @*/ 2044 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2045 { 2046 PetscErrorCode ierr; 2047 2048 PetscFunctionBegin; 2049 PetscValidType(F,1); 2050 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2051 PetscValidLogicalCollectiveInt(F,icntl,2); 2052 PetscValidLogicalCollectiveInt(F,ival,3); 2053 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 /*@ 2058 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2059 2060 Logically Collective on Mat 2061 2062 Input Parameters: 2063 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2064 - icntl - index of MUMPS parameter array ICNTL() 2065 2066 Output Parameter: 2067 . ival - value of MUMPS ICNTL(icntl) 2068 2069 Level: beginner 2070 2071 References: 2072 . MUMPS Users' Guide 2073 2074 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2075 @*/ 2076 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2077 { 2078 PetscErrorCode ierr; 2079 2080 PetscFunctionBegin; 2081 PetscValidType(F,1); 2082 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2083 PetscValidLogicalCollectiveInt(F,icntl,2); 2084 PetscValidIntPointer(ival,3); 2085 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 /* -------------------------------------------------------------------------------------------*/ 2090 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2091 { 2092 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2093 2094 PetscFunctionBegin; 2095 mumps->id.CNTL(icntl) = val; 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2100 { 2101 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2102 2103 PetscFunctionBegin; 2104 *val = mumps->id.CNTL(icntl); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /*@ 2109 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2110 2111 Logically Collective on Mat 2112 2113 Input Parameters: 2114 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2115 . icntl - index of MUMPS parameter array CNTL() 2116 - val - value of MUMPS CNTL(icntl) 2117 2118 Options Database: 2119 . -mat_mumps_cntl_<icntl> <val> 2120 2121 Level: beginner 2122 2123 References: 2124 . MUMPS Users' Guide 2125 2126 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2127 @*/ 2128 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2129 { 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 PetscValidType(F,1); 2134 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2135 PetscValidLogicalCollectiveInt(F,icntl,2); 2136 PetscValidLogicalCollectiveReal(F,val,3); 2137 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 /*@ 2142 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2143 2144 Logically Collective on Mat 2145 2146 Input Parameters: 2147 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2148 - icntl - index of MUMPS parameter array CNTL() 2149 2150 Output Parameter: 2151 . val - value of MUMPS CNTL(icntl) 2152 2153 Level: beginner 2154 2155 References: 2156 . MUMPS Users' Guide 2157 2158 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2159 @*/ 2160 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2161 { 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 PetscValidType(F,1); 2166 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2167 PetscValidLogicalCollectiveInt(F,icntl,2); 2168 PetscValidRealPointer(val,3); 2169 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2174 { 2175 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2176 2177 PetscFunctionBegin; 2178 *info = mumps->id.INFO(icntl); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2183 { 2184 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2185 2186 PetscFunctionBegin; 2187 *infog = mumps->id.INFOG(icntl); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2192 { 2193 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2194 2195 PetscFunctionBegin; 2196 *rinfo = mumps->id.RINFO(icntl); 2197 PetscFunctionReturn(0); 2198 } 2199 2200 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2201 { 2202 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2203 2204 PetscFunctionBegin; 2205 *rinfog = mumps->id.RINFOG(icntl); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2210 { 2211 PetscErrorCode ierr; 2212 Mat Bt = NULL,Btseq = NULL; 2213 PetscBool flg; 2214 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2215 PetscScalar *aa; 2216 PetscInt spnr,*ia,*ja; 2217 2218 PetscFunctionBegin; 2219 PetscValidIntPointer(spRHS,2); 2220 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2221 if (flg) { 2222 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2223 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2224 2225 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2226 2227 if (mumps->petsc_size > 1) { 2228 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2229 Btseq = b->A; 2230 } else { 2231 Btseq = Bt; 2232 } 2233 2234 if (!mumps->myid) { 2235 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2236 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2237 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2238 2239 mumps->id.irhs_ptr = ia; 2240 mumps->id.irhs_sparse = ja; 2241 mumps->id.nz_rhs = ia[spnr] - 1; 2242 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2243 } else { 2244 mumps->id.irhs_ptr = NULL; 2245 mumps->id.irhs_sparse = NULL; 2246 mumps->id.nz_rhs = 0; 2247 mumps->id.rhs_sparse = NULL; 2248 } 2249 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2250 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2251 2252 /* solve phase */ 2253 /*-------------*/ 2254 mumps->id.job = JOB_SOLVE; 2255 PetscMUMPS_c(mumps); 2256 if (mumps->id.INFOG(1) < 0) 2257 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2258 2259 if (!mumps->myid) { 2260 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2261 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2262 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 /*@ 2268 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2269 2270 Logically Collective on Mat 2271 2272 Input Parameters: 2273 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2274 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2275 2276 Output Parameter: 2277 . spRHS - requested entries of inverse of A 2278 2279 Level: beginner 2280 2281 References: 2282 . MUMPS Users' Guide 2283 2284 .seealso: MatGetFactor(), MatCreateTranspose() 2285 @*/ 2286 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2287 { 2288 PetscErrorCode ierr; 2289 2290 PetscFunctionBegin; 2291 PetscValidType(F,1); 2292 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2293 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2294 PetscFunctionReturn(0); 2295 } 2296 2297 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2298 { 2299 PetscErrorCode ierr; 2300 Mat spRHS; 2301 2302 PetscFunctionBegin; 2303 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2304 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2305 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 /*@ 2310 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2311 2312 Logically Collective on Mat 2313 2314 Input Parameters: 2315 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2316 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2317 2318 Output Parameter: 2319 . spRHST - requested entries of inverse of A^T 2320 2321 Level: beginner 2322 2323 References: 2324 . MUMPS Users' Guide 2325 2326 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2327 @*/ 2328 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2329 { 2330 PetscErrorCode ierr; 2331 PetscBool flg; 2332 2333 PetscFunctionBegin; 2334 PetscValidType(F,1); 2335 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2336 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2337 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2338 2339 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 /*@ 2344 MatMumpsGetInfo - Get MUMPS parameter INFO() 2345 2346 Logically Collective on Mat 2347 2348 Input Parameters: 2349 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2350 - icntl - index of MUMPS parameter array INFO() 2351 2352 Output Parameter: 2353 . ival - value of MUMPS INFO(icntl) 2354 2355 Level: beginner 2356 2357 References: 2358 . MUMPS Users' Guide 2359 2360 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2361 @*/ 2362 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2363 { 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 PetscValidType(F,1); 2368 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2369 PetscValidIntPointer(ival,3); 2370 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 /*@ 2375 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2376 2377 Logically Collective on Mat 2378 2379 Input Parameters: 2380 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2381 - icntl - index of MUMPS parameter array INFOG() 2382 2383 Output Parameter: 2384 . ival - value of MUMPS INFOG(icntl) 2385 2386 Level: beginner 2387 2388 References: 2389 . MUMPS Users' Guide 2390 2391 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2392 @*/ 2393 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2394 { 2395 PetscErrorCode ierr; 2396 2397 PetscFunctionBegin; 2398 PetscValidType(F,1); 2399 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2400 PetscValidIntPointer(ival,3); 2401 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2402 PetscFunctionReturn(0); 2403 } 2404 2405 /*@ 2406 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2407 2408 Logically Collective on Mat 2409 2410 Input Parameters: 2411 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2412 - icntl - index of MUMPS parameter array RINFO() 2413 2414 Output Parameter: 2415 . val - value of MUMPS RINFO(icntl) 2416 2417 Level: beginner 2418 2419 References: 2420 . MUMPS Users' Guide 2421 2422 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2423 @*/ 2424 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2425 { 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 PetscValidType(F,1); 2430 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2431 PetscValidRealPointer(val,3); 2432 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 /*@ 2437 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2438 2439 Logically Collective on Mat 2440 2441 Input Parameters: 2442 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2443 - icntl - index of MUMPS parameter array RINFOG() 2444 2445 Output Parameter: 2446 . val - value of MUMPS RINFOG(icntl) 2447 2448 Level: beginner 2449 2450 References: 2451 . MUMPS Users' Guide 2452 2453 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2454 @*/ 2455 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2456 { 2457 PetscErrorCode ierr; 2458 2459 PetscFunctionBegin; 2460 PetscValidType(F,1); 2461 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2462 PetscValidRealPointer(val,3); 2463 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 /*MC 2468 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2469 distributed and sequential matrices via the external package MUMPS. 2470 2471 Works with MATAIJ and MATSBAIJ matrices 2472 2473 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2474 2475 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2476 2477 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2478 2479 Options Database Keys: 2480 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2481 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2482 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2483 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2484 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2485 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2486 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2487 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2488 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2489 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2490 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2491 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2492 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2493 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2494 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2495 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2496 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2497 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2498 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2499 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2500 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2501 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2502 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2503 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2504 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2505 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2506 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2507 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2508 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2509 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2510 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2511 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2512 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2513 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2514 2515 Level: beginner 2516 2517 Notes: 2518 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2519 2520 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2521 $ KSPGetPC(ksp,&pc); 2522 $ PCFactorGetMatrix(pc,&mat); 2523 $ MatMumpsGetInfo(mat,....); 2524 $ MatMumpsGetInfog(mat,....); etc. 2525 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2526 2527 Two modes to run MUMPS/PETSc with OpenMP 2528 2529 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2530 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2531 2532 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2533 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2534 2535 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2536 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2537 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2538 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2539 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2540 2541 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2542 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2543 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2544 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2545 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2546 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2547 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2548 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2549 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2550 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2551 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2552 examine the mapping result. 2553 2554 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2555 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2556 calls omp_set_num_threads(m) internally before calling MUMPS. 2557 2558 References: 2559 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2560 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2561 2562 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2563 2564 M*/ 2565 2566 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2567 { 2568 PetscFunctionBegin; 2569 *type = MATSOLVERMUMPS; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 /* MatGetFactor for Seq and MPI AIJ matrices */ 2574 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2575 { 2576 Mat B; 2577 PetscErrorCode ierr; 2578 Mat_MUMPS *mumps; 2579 PetscBool isSeqAIJ; 2580 2581 PetscFunctionBegin; 2582 /* Create the factorization matrix */ 2583 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2584 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2585 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2586 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2587 ierr = MatSetUp(B);CHKERRQ(ierr); 2588 2589 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2590 2591 B->ops->view = MatView_MUMPS; 2592 B->ops->getinfo = MatGetInfo_MUMPS; 2593 2594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2607 2608 if (ftype == MAT_FACTOR_LU) { 2609 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2610 B->factortype = MAT_FACTOR_LU; 2611 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2612 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2613 mumps->sym = 0; 2614 } else { 2615 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2616 B->factortype = MAT_FACTOR_CHOLESKY; 2617 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2618 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2619 #if defined(PETSC_USE_COMPLEX) 2620 mumps->sym = 2; 2621 #else 2622 if (A->spd_set && A->spd) mumps->sym = 1; 2623 else mumps->sym = 2; 2624 #endif 2625 } 2626 2627 /* set solvertype */ 2628 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2629 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2630 2631 B->ops->destroy = MatDestroy_MUMPS; 2632 B->data = (void*)mumps; 2633 2634 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2635 2636 *F = B; 2637 PetscFunctionReturn(0); 2638 } 2639 2640 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2641 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2642 { 2643 Mat B; 2644 PetscErrorCode ierr; 2645 Mat_MUMPS *mumps; 2646 PetscBool isSeqSBAIJ; 2647 2648 PetscFunctionBegin; 2649 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2650 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 2651 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2652 /* Create the factorization matrix */ 2653 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2654 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2655 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2656 ierr = MatSetUp(B);CHKERRQ(ierr); 2657 2658 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2659 if (isSeqSBAIJ) { 2660 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2661 } else { 2662 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2663 } 2664 2665 B->ops->getinfo = MatGetInfo_External; 2666 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2667 B->ops->view = MatView_MUMPS; 2668 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2682 2683 B->factortype = MAT_FACTOR_CHOLESKY; 2684 #if defined(PETSC_USE_COMPLEX) 2685 mumps->sym = 2; 2686 #else 2687 if (A->spd_set && A->spd) mumps->sym = 1; 2688 else mumps->sym = 2; 2689 #endif 2690 2691 /* set solvertype */ 2692 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2693 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2694 2695 B->ops->destroy = MatDestroy_MUMPS; 2696 B->data = (void*)mumps; 2697 2698 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2699 2700 *F = B; 2701 PetscFunctionReturn(0); 2702 } 2703 2704 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2705 { 2706 Mat B; 2707 PetscErrorCode ierr; 2708 Mat_MUMPS *mumps; 2709 PetscBool isSeqBAIJ; 2710 2711 PetscFunctionBegin; 2712 /* Create the factorization matrix */ 2713 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2714 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2715 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2716 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2717 ierr = MatSetUp(B);CHKERRQ(ierr); 2718 2719 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2720 if (ftype == MAT_FACTOR_LU) { 2721 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2722 B->factortype = MAT_FACTOR_LU; 2723 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2724 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2725 mumps->sym = 0; 2726 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2727 2728 B->ops->getinfo = MatGetInfo_External; 2729 B->ops->view = MatView_MUMPS; 2730 2731 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2732 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2733 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2734 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2744 2745 /* set solvertype */ 2746 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2747 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2748 2749 B->ops->destroy = MatDestroy_MUMPS; 2750 B->data = (void*)mumps; 2751 2752 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2753 2754 *F = B; 2755 PetscFunctionReturn(0); 2756 } 2757 2758 /* MatGetFactor for Seq and MPI SELL matrices */ 2759 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2760 { 2761 Mat B; 2762 PetscErrorCode ierr; 2763 Mat_MUMPS *mumps; 2764 PetscBool isSeqSELL; 2765 2766 PetscFunctionBegin; 2767 /* Create the factorization matrix */ 2768 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2769 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2770 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2771 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2772 ierr = MatSetUp(B);CHKERRQ(ierr); 2773 2774 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2775 2776 B->ops->view = MatView_MUMPS; 2777 B->ops->getinfo = MatGetInfo_MUMPS; 2778 2779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2782 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2783 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2784 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2785 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2786 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2787 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2788 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2789 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2790 2791 if (ftype == MAT_FACTOR_LU) { 2792 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2793 B->factortype = MAT_FACTOR_LU; 2794 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2795 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2796 mumps->sym = 0; 2797 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2798 2799 /* set solvertype */ 2800 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2801 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2802 2803 B->ops->destroy = MatDestroy_MUMPS; 2804 B->data = (void*)mumps; 2805 2806 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2807 2808 *F = B; 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2813 { 2814 PetscErrorCode ierr; 2815 2816 PetscFunctionBegin; 2817 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2818 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2819 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2820 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2821 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2822 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2823 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2824 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2825 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2826 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2827 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831