1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj; 339 PetscInt nz,rnz,i,j,k,m,bs; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 PetscScalar *val; 343 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 344 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 345 #if defined(PETSC_USE_COMPLEX) 346 PetscBool hermitian; 347 #endif 348 349 PetscFunctionBegin; 350 #if defined(PETSC_USE_COMPLEX) 351 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 352 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 353 #endif 354 ai = aa->i; 355 aj = aa->j; 356 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 357 if (reuse == MAT_INITIAL_MATRIX) { 358 nz = aa->nz; 359 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 360 col = row + bs2*nz; 361 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 362 else val = aa->a; 363 364 *r = row; *c = col; *v = val; 365 } else { 366 if (bs == 1) *v = aa->a; 367 row = *r; col = *c; val = *v; 368 } 369 370 nz = 0; 371 if (bs>1) { 372 for (i=0; i<mbs; i++) { 373 rnz = ai[i+1] - ai[i]; 374 ajj = aj + ai[i]; 375 for (j=0; j<rnz; j++) { 376 for (k=0; k<bs; k++) { 377 for (m=0; m<bs; m++) { 378 if (ajj[j]>i || k>=m) { 379 if (reuse == MAT_INITIAL_MATRIX) { 380 row[nz] = i*bs + m + shift; 381 col[nz] = ajj[j]*bs + k + shift; 382 } 383 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 384 } 385 } 386 } 387 } 388 } 389 } else if (reuse == MAT_INITIAL_MATRIX) { 390 for (i=0; i<mbs; i++) { 391 rnz = ai[i+1] - ai[i]; 392 ajj = aj + ai[i]; 393 for (j=0; j<rnz; j++) { 394 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 395 } 396 } 397 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 398 } 399 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 400 PetscFunctionReturn(0); 401 } 402 403 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 404 { 405 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 406 PetscInt nz,rnz,i,j; 407 const PetscScalar *av,*v1; 408 PetscScalar *val; 409 PetscErrorCode ierr; 410 PetscInt *row,*col; 411 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 412 PetscBool missing; 413 #if defined(PETSC_USE_COMPLEX) 414 PetscBool hermitian; 415 #endif 416 417 PetscFunctionBegin; 418 #if defined(PETSC_USE_COMPLEX) 419 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 420 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 421 #endif 422 ai = aa->i; aj = aa->j; av = aa->a; 423 adiag = aa->diag; 424 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 425 if (reuse == MAT_INITIAL_MATRIX) { 426 /* count nz in the upper triangular part of A */ 427 nz = 0; 428 if (missing) { 429 for (i=0; i<M; i++) { 430 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 431 for (j=ai[i];j<ai[i+1];j++) { 432 if (aj[j] < i) continue; 433 nz++; 434 } 435 } else { 436 nz += ai[i+1] - adiag[i]; 437 } 438 } 439 } else { 440 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 441 } 442 *nnz = nz; 443 444 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 445 col = row + nz; 446 val = (PetscScalar*)(col + nz); 447 448 nz = 0; 449 if (missing) { 450 for (i=0; i<M; i++) { 451 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 452 for (j=ai[i];j<ai[i+1];j++) { 453 if (aj[j] < i) continue; 454 row[nz] = i+shift; 455 col[nz] = aj[j]+shift; 456 val[nz] = av[j]; 457 nz++; 458 } 459 } else { 460 rnz = ai[i+1] - adiag[i]; 461 ajj = aj + adiag[i]; 462 v1 = av + adiag[i]; 463 for (j=0; j<rnz; j++) { 464 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 465 } 466 } 467 } 468 } else { 469 for (i=0; i<M; i++) { 470 rnz = ai[i+1] - adiag[i]; 471 ajj = aj + adiag[i]; 472 v1 = av + adiag[i]; 473 for (j=0; j<rnz; j++) { 474 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 475 } 476 } 477 } 478 *r = row; *c = col; *v = val; 479 } else { 480 nz = 0; val = *v; 481 if (missing) { 482 for (i=0; i <M; i++) { 483 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 484 for (j=ai[i];j<ai[i+1];j++) { 485 if (aj[j] < i) continue; 486 val[nz++] = av[j]; 487 } 488 } else { 489 rnz = ai[i+1] - adiag[i]; 490 v1 = av + adiag[i]; 491 for (j=0; j<rnz; j++) { 492 val[nz++] = v1[j]; 493 } 494 } 495 } 496 } else { 497 for (i=0; i <M; i++) { 498 rnz = ai[i+1] - adiag[i]; 499 v1 = av + adiag[i]; 500 for (j=0; j<rnz; j++) { 501 val[nz++] = v1[j]; 502 } 503 } 504 } 505 } 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 510 { 511 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 512 PetscErrorCode ierr; 513 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 514 PetscInt *row,*col; 515 const PetscScalar *av,*bv,*v1,*v2; 516 PetscScalar *val; 517 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 518 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 519 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 520 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 521 #if defined(PETSC_USE_COMPLEX) 522 PetscBool hermitian; 523 #endif 524 525 PetscFunctionBegin; 526 #if defined(PETSC_USE_COMPLEX) 527 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 528 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 529 #endif 530 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 531 rstart = A->rmap->rstart; 532 ai = aa->i; 533 aj = aa->j; 534 bi = bb->i; 535 bj = bb->j; 536 av = aa->a; 537 bv = bb->a; 538 539 garray = mat->garray; 540 541 if (reuse == MAT_INITIAL_MATRIX) { 542 nz = aa->nz + bb->nz; 543 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 544 col = row + bs2*nz; 545 val = (PetscScalar*)(col + bs2*nz); 546 547 *r = row; *c = col; *v = val; 548 } else { 549 row = *r; col = *c; val = *v; 550 } 551 552 jj = 0; irow = rstart; 553 for (i=0; i<mbs; i++) { 554 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 555 countA = ai[i+1] - ai[i]; 556 countB = bi[i+1] - bi[i]; 557 bjj = bj + bi[i]; 558 v1 = av + ai[i]*bs2; 559 v2 = bv + bi[i]*bs2; 560 561 if (bs>1) { 562 /* A-part */ 563 for (j=0; j<countA; j++) { 564 for (k=0; k<bs; k++) { 565 for (m=0; m<bs; m++) { 566 if (rstart + ajj[j]*bs>irow || k>=m) { 567 if (reuse == MAT_INITIAL_MATRIX) { 568 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 569 } 570 val[jj++] = v1[j*bs2 + m + k*bs]; 571 } 572 } 573 } 574 } 575 576 /* B-part */ 577 for (j=0; j < countB; j++) { 578 for (k=0; k<bs; k++) { 579 for (m=0; m<bs; m++) { 580 if (reuse == MAT_INITIAL_MATRIX) { 581 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 582 } 583 val[jj++] = v2[j*bs2 + m + k*bs]; 584 } 585 } 586 } 587 } else { 588 /* A-part */ 589 for (j=0; j<countA; j++) { 590 if (reuse == MAT_INITIAL_MATRIX) { 591 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 592 } 593 val[jj++] = v1[j]; 594 } 595 596 /* B-part */ 597 for (j=0; j < countB; j++) { 598 if (reuse == MAT_INITIAL_MATRIX) { 599 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 600 } 601 val[jj++] = v2[j]; 602 } 603 } 604 irow+=bs; 605 } 606 *nnz = jj; 607 PetscFunctionReturn(0); 608 } 609 610 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 611 { 612 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 613 PetscErrorCode ierr; 614 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 615 PetscInt *row,*col; 616 const PetscScalar *av, *bv,*v1,*v2; 617 PetscScalar *val; 618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 619 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 620 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 621 622 PetscFunctionBegin; 623 rstart = A->rmap->rstart; 624 ai = aa->i; 625 aj = aa->j; 626 bi = bb->i; 627 bj = bb->j; 628 av = aa->a; 629 bv = bb->a; 630 631 garray = mat->garray; 632 633 if (reuse == MAT_INITIAL_MATRIX) { 634 nz = aa->nz + bb->nz; 635 *nnz = nz; 636 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 637 col = row + nz; 638 val = (PetscScalar*)(col + nz); 639 640 *r = row; *c = col; *v = val; 641 } else { 642 row = *r; col = *c; val = *v; 643 } 644 645 jj = 0; irow = rstart; 646 for (i=0; i<m; i++) { 647 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 648 countA = ai[i+1] - ai[i]; 649 countB = bi[i+1] - bi[i]; 650 bjj = bj + bi[i]; 651 v1 = av + ai[i]; 652 v2 = bv + bi[i]; 653 654 /* A-part */ 655 for (j=0; j<countA; j++) { 656 if (reuse == MAT_INITIAL_MATRIX) { 657 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 658 } 659 val[jj++] = v1[j]; 660 } 661 662 /* B-part */ 663 for (j=0; j < countB; j++) { 664 if (reuse == MAT_INITIAL_MATRIX) { 665 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 666 } 667 val[jj++] = v2[j]; 668 } 669 irow++; 670 } 671 PetscFunctionReturn(0); 672 } 673 674 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 675 { 676 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 677 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 678 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 679 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 680 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 681 const PetscInt bs2=mat->bs2; 682 PetscErrorCode ierr; 683 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 684 PetscInt *row,*col; 685 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 686 PetscScalar *val; 687 688 PetscFunctionBegin; 689 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 690 if (reuse == MAT_INITIAL_MATRIX) { 691 nz = bs2*(aa->nz + bb->nz); 692 *nnz = nz; 693 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 694 col = row + nz; 695 val = (PetscScalar*)(col + nz); 696 697 *r = row; *c = col; *v = val; 698 } else { 699 row = *r; col = *c; val = *v; 700 } 701 702 jj = 0; irow = rstart; 703 for (i=0; i<mbs; i++) { 704 countA = ai[i+1] - ai[i]; 705 countB = bi[i+1] - bi[i]; 706 ajj = aj + ai[i]; 707 bjj = bj + bi[i]; 708 v1 = av + bs2*ai[i]; 709 v2 = bv + bs2*bi[i]; 710 711 idx = 0; 712 /* A-part */ 713 for (k=0; k<countA; k++) { 714 for (j=0; j<bs; j++) { 715 for (n=0; n<bs; n++) { 716 if (reuse == MAT_INITIAL_MATRIX) { 717 row[jj] = irow + n + shift; 718 col[jj] = rstart + bs*ajj[k] + j + shift; 719 } 720 val[jj++] = v1[idx++]; 721 } 722 } 723 } 724 725 idx = 0; 726 /* B-part */ 727 for (k=0; k<countB; k++) { 728 for (j=0; j<bs; j++) { 729 for (n=0; n<bs; n++) { 730 if (reuse == MAT_INITIAL_MATRIX) { 731 row[jj] = irow + n + shift; 732 col[jj] = bs*garray[bjj[k]] + j + shift; 733 } 734 val[jj++] = v2[idx++]; 735 } 736 } 737 } 738 irow += bs; 739 } 740 PetscFunctionReturn(0); 741 } 742 743 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 744 { 745 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 746 PetscErrorCode ierr; 747 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 748 PetscInt *row,*col; 749 const PetscScalar *av, *bv,*v1,*v2; 750 PetscScalar *val; 751 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 752 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 753 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 754 #if defined(PETSC_USE_COMPLEX) 755 PetscBool hermitian; 756 #endif 757 758 PetscFunctionBegin; 759 #if defined(PETSC_USE_COMPLEX) 760 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 761 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 762 #endif 763 ai = aa->i; 764 aj = aa->j; 765 adiag = aa->diag; 766 bi = bb->i; 767 bj = bb->j; 768 garray = mat->garray; 769 av = aa->a; 770 bv = bb->a; 771 772 rstart = A->rmap->rstart; 773 774 if (reuse == MAT_INITIAL_MATRIX) { 775 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 776 nzb = 0; /* num of upper triangular entries in mat->B */ 777 for (i=0; i<m; i++) { 778 nza += (ai[i+1] - adiag[i]); 779 countB = bi[i+1] - bi[i]; 780 bjj = bj + bi[i]; 781 for (j=0; j<countB; j++) { 782 if (garray[bjj[j]] > rstart) nzb++; 783 } 784 } 785 786 nz = nza + nzb; /* total nz of upper triangular part of mat */ 787 *nnz = nz; 788 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 789 col = row + nz; 790 val = (PetscScalar*)(col + nz); 791 792 *r = row; *c = col; *v = val; 793 } else { 794 row = *r; col = *c; val = *v; 795 } 796 797 jj = 0; irow = rstart; 798 for (i=0; i<m; i++) { 799 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 800 v1 = av + adiag[i]; 801 countA = ai[i+1] - adiag[i]; 802 countB = bi[i+1] - bi[i]; 803 bjj = bj + bi[i]; 804 v2 = bv + bi[i]; 805 806 /* A-part */ 807 for (j=0; j<countA; j++) { 808 if (reuse == MAT_INITIAL_MATRIX) { 809 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 810 } 811 val[jj++] = v1[j]; 812 } 813 814 /* B-part */ 815 for (j=0; j < countB; j++) { 816 if (garray[bjj[j]] > rstart) { 817 if (reuse == MAT_INITIAL_MATRIX) { 818 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 819 } 820 val[jj++] = v2[j]; 821 } 822 } 823 irow++; 824 } 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatDestroy_MUMPS(Mat A) 829 { 830 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 835 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 836 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 837 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 838 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 839 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 840 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 841 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 842 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 843 mumps->id.job = JOB_END; 844 PetscMUMPS_c(mumps); 845 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 846 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 847 #endif 848 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 849 ierr = PetscFree(A->data);CHKERRQ(ierr); 850 851 /* clear composed functions */ 852 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 853 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 869 { 870 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 871 PetscScalar *array; 872 Vec b_seq; 873 IS is_iden,is_petsc; 874 PetscErrorCode ierr; 875 PetscInt i; 876 PetscBool second_solve = PETSC_FALSE; 877 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 878 879 PetscFunctionBegin; 880 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 881 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 882 883 if (A->factorerrortype) { 884 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 885 ierr = VecSetInf(x);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 mumps->id.ICNTL(20)= 0; /* dense RHS */ 890 mumps->id.nrhs = 1; 891 b_seq = mumps->b_seq; 892 if (mumps->petsc_size > 1) { 893 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 894 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 897 } else { /* petsc_size == 1 */ 898 ierr = VecCopy(b,x);CHKERRQ(ierr); 899 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 900 } 901 if (!mumps->myid) { /* define rhs on the host */ 902 mumps->id.nrhs = 1; 903 mumps->id.rhs = (MumpsScalar*)array; 904 } 905 906 /* 907 handle condensation step of Schur complement (if any) 908 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 909 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 910 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 911 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 912 */ 913 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 914 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 915 second_solve = PETSC_TRUE; 916 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 917 } 918 /* solve phase */ 919 /*-------------*/ 920 mumps->id.job = JOB_SOLVE; 921 PetscMUMPS_c(mumps); 922 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 923 924 /* handle expansion step of Schur complement (if any) */ 925 if (second_solve) { 926 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 927 } 928 929 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 930 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 931 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 932 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 933 } 934 if (!mumps->scat_sol) { /* create scatter scat_sol */ 935 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 936 for (i=0; i<mumps->id.lsol_loc; i++) { 937 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 938 } 939 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 940 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 941 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 942 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 943 944 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 945 } 946 947 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 } 950 951 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 952 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 953 954 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 959 { 960 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 961 PetscErrorCode ierr; 962 963 PetscFunctionBegin; 964 mumps->id.ICNTL(9) = 0; 965 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 966 mumps->id.ICNTL(9) = 1; 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 971 { 972 PetscErrorCode ierr; 973 Mat Bt = NULL; 974 PetscBool flg, flgT; 975 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 976 PetscInt i,nrhs,M; 977 PetscScalar *array; 978 const PetscScalar *rbray; 979 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 980 PetscScalar *bray,*sol_loc,*sol_loc_save; 981 IS is_to,is_from; 982 PetscInt k,proc,j,m,myrstart; 983 const PetscInt *rstart; 984 Vec v_mpi,b_seq,msol_loc; 985 VecScatter scat_rhs,scat_sol; 986 PetscScalar *aa; 987 PetscInt spnr,*ia,*ja; 988 Mat_MPIAIJ *b = NULL; 989 990 PetscFunctionBegin; 991 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 992 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 993 994 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 995 if (flg) { /* dense B */ 996 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 997 mumps->id.ICNTL(20)= 0; /* dense RHS */ 998 } else { /* sparse B */ 999 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1000 if (flgT) { /* input B is transpose of actural RHS matrix, 1001 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1002 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1003 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1004 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1005 } 1006 1007 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1008 mumps->id.nrhs = nrhs; 1009 mumps->id.lrhs = M; 1010 mumps->id.rhs = NULL; 1011 1012 if (mumps->petsc_size == 1) { 1013 PetscScalar *aa; 1014 PetscInt spnr,*ia,*ja; 1015 PetscBool second_solve = PETSC_FALSE; 1016 1017 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1018 mumps->id.rhs = (MumpsScalar*)array; 1019 1020 if (!Bt) { /* dense B */ 1021 /* copy B to X */ 1022 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1023 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1024 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1025 } else { /* sparse B */ 1026 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1027 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1028 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1029 /* mumps requires ia and ja start at 1! */ 1030 mumps->id.irhs_ptr = ia; 1031 mumps->id.irhs_sparse = ja; 1032 mumps->id.nz_rhs = ia[spnr] - 1; 1033 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1034 } 1035 /* handle condensation step of Schur complement (if any) */ 1036 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1037 second_solve = PETSC_TRUE; 1038 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1039 } 1040 /* solve phase */ 1041 /*-------------*/ 1042 mumps->id.job = JOB_SOLVE; 1043 PetscMUMPS_c(mumps); 1044 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1045 1046 /* handle expansion step of Schur complement (if any) */ 1047 if (second_solve) { 1048 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1049 } 1050 if (Bt) { /* sparse B */ 1051 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1052 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1053 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1054 } 1055 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1060 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1061 1062 /* create msol_loc to hold mumps local solution */ 1063 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1064 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1065 1066 lsol_loc = mumps->id.lsol_loc; 1067 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1068 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1069 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1070 mumps->id.isol_loc = isol_loc; 1071 1072 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1073 1074 if (!Bt) { /* dense B */ 1075 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1076 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1077 0, re-arrange B into desired order, which is a local operation. 1078 */ 1079 1080 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1081 /* wrap dense rhs matrix B into a vector v_mpi */ 1082 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1083 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1084 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1085 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1086 1087 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1088 if (!mumps->myid) { 1089 PetscInt *idx; 1090 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1091 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1092 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1093 k = 0; 1094 for (proc=0; proc<mumps->petsc_size; proc++){ 1095 for (j=0; j<nrhs; j++){ 1096 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1097 } 1098 } 1099 1100 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1101 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1102 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1103 } else { 1104 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1105 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1106 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1107 } 1108 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1109 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1111 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 1114 if (!mumps->myid) { /* define rhs on the host */ 1115 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1116 mumps->id.rhs = (MumpsScalar*)bray; 1117 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1118 } 1119 1120 } else { /* sparse B */ 1121 b = (Mat_MPIAIJ*)Bt->data; 1122 1123 /* wrap dense X into a vector v_mpi */ 1124 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1125 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1126 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1127 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1128 1129 if (!mumps->myid) { 1130 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1131 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1132 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1133 /* mumps requires ia and ja start at 1! */ 1134 mumps->id.irhs_ptr = ia; 1135 mumps->id.irhs_sparse = ja; 1136 mumps->id.nz_rhs = ia[spnr] - 1; 1137 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1138 } else { 1139 mumps->id.irhs_ptr = NULL; 1140 mumps->id.irhs_sparse = NULL; 1141 mumps->id.nz_rhs = 0; 1142 mumps->id.rhs_sparse = NULL; 1143 } 1144 } 1145 1146 /* solve phase */ 1147 /*-------------*/ 1148 mumps->id.job = JOB_SOLVE; 1149 PetscMUMPS_c(mumps); 1150 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1151 1152 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1153 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1154 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1155 1156 /* create scatter scat_sol */ 1157 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1158 /* iidx: index for scatter mumps solution to petsc X */ 1159 1160 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1161 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1162 for (i=0; i<lsol_loc; i++) { 1163 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1164 1165 for (proc=0; proc<mumps->petsc_size; proc++){ 1166 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1167 myrstart = rstart[proc]; 1168 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1169 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1170 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1171 break; 1172 } 1173 } 1174 1175 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1176 } 1177 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1178 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1179 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1180 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1181 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1182 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1183 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1184 1185 /* free spaces */ 1186 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1187 mumps->id.isol_loc = isol_loc_save; 1188 1189 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1190 ierr = PetscFree(idxx);CHKERRQ(ierr); 1191 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1192 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1193 if (Bt) { 1194 if (!mumps->myid) { 1195 b = (Mat_MPIAIJ*)Bt->data; 1196 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1197 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1198 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1199 } 1200 } else { 1201 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1202 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1203 } 1204 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1205 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1210 { 1211 PetscErrorCode ierr; 1212 PetscBool flg; 1213 Mat B; 1214 1215 PetscFunctionBegin; 1216 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1217 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1218 1219 /* Create B=Bt^T that uses Bt's data structure */ 1220 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1221 1222 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1223 ierr = MatDestroy(&B);CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #if !defined(PETSC_USE_COMPLEX) 1228 /* 1229 input: 1230 F: numeric factor 1231 output: 1232 nneg: total number of negative pivots 1233 nzero: total number of zero pivots 1234 npos: (global dimension of F) - nneg - nzero 1235 */ 1236 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1237 { 1238 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1239 PetscErrorCode ierr; 1240 PetscMPIInt size; 1241 1242 PetscFunctionBegin; 1243 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1244 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1245 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1246 1247 if (nneg) *nneg = mumps->id.INFOG(12); 1248 if (nzero || npos) { 1249 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1250 if (nzero) *nzero = mumps->id.INFOG(28); 1251 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 #endif 1256 1257 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1258 { 1259 PetscErrorCode ierr; 1260 PetscInt i,nz=0,*irn,*jcn=0; 1261 PetscScalar *val=0; 1262 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1263 1264 PetscFunctionBegin; 1265 if (mumps->omp_comm_size > 1) { 1266 if (reuse == MAT_INITIAL_MATRIX) { 1267 /* master first gathers counts of nonzeros to receive */ 1268 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1269 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1270 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1271 1272 /* master allocates memory to receive nonzeros */ 1273 if (mumps->is_omp_master) { 1274 displs[0] = 0; 1275 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1276 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1277 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1278 jcn = irn + nz; 1279 val = (PetscScalar*)(jcn + nz); 1280 } 1281 1282 /* save the gatherv plan */ 1283 mumps->mpinz = mpinz; /* used as send count */ 1284 mumps->recvcount = recvcount; 1285 mumps->displs = displs; 1286 1287 /* master gathers nonzeros */ 1288 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1289 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1290 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1291 1292 /* master frees its row/col/val and replaces them with bigger arrays */ 1293 if (mumps->is_omp_master) { 1294 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1295 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1296 mumps->irn = irn; 1297 mumps->jcn = jcn; 1298 mumps->val = val; 1299 } 1300 } else { 1301 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1302 } 1303 } 1304 PetscFunctionReturn(0); 1305 } 1306 1307 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1308 { 1309 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1310 PetscErrorCode ierr; 1311 PetscBool isMPIAIJ; 1312 1313 PetscFunctionBegin; 1314 if (mumps->id.INFOG(1) < 0) { 1315 if (mumps->id.INFOG(1) == -6) { 1316 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1317 } 1318 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1323 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1324 1325 /* numerical factorization phase */ 1326 /*-------------------------------*/ 1327 mumps->id.job = JOB_FACTNUMERIC; 1328 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1329 if (!mumps->myid) { 1330 mumps->id.a = (MumpsScalar*)mumps->val; 1331 } 1332 } else { 1333 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1334 } 1335 PetscMUMPS_c(mumps); 1336 if (mumps->id.INFOG(1) < 0) { 1337 if (A->erroriffailure) { 1338 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1339 } else { 1340 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1341 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1342 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1343 } else if (mumps->id.INFOG(1) == -13) { 1344 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1345 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1346 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1347 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1348 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1349 } else { 1350 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1351 F->factorerrortype = MAT_FACTOR_OTHER; 1352 } 1353 } 1354 } 1355 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1356 1357 F->assembled = PETSC_TRUE; 1358 mumps->matstruc = SAME_NONZERO_PATTERN; 1359 if (F->schur) { /* reset Schur status to unfactored */ 1360 #if defined(PETSC_HAVE_CUDA) 1361 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1362 #endif 1363 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1364 mumps->id.ICNTL(19) = 2; 1365 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1366 } 1367 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1368 } 1369 1370 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1371 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1372 1373 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1374 if (mumps->petsc_size > 1) { 1375 PetscInt lsol_loc; 1376 PetscScalar *sol_loc; 1377 1378 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1379 1380 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1381 if (mumps->x_seq) { 1382 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1383 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1384 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1385 } 1386 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1387 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1388 mumps->id.lsol_loc = lsol_loc; 1389 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1390 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1391 } 1392 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 /* Sets MUMPS options from the options database */ 1397 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1398 { 1399 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1400 PetscErrorCode ierr; 1401 PetscInt icntl,info[80],i,ninfo=80; 1402 PetscBool flg; 1403 1404 PetscFunctionBegin; 1405 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1406 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1407 if (flg) mumps->id.ICNTL(1) = icntl; 1408 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1409 if (flg) mumps->id.ICNTL(2) = icntl; 1410 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1411 if (flg) mumps->id.ICNTL(3) = icntl; 1412 1413 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1414 if (flg) mumps->id.ICNTL(4) = icntl; 1415 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1416 1417 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1418 if (flg) mumps->id.ICNTL(6) = icntl; 1419 1420 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1421 if (flg) { 1422 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1423 else mumps->id.ICNTL(7) = icntl; 1424 } 1425 1426 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1427 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1428 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1429 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1430 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1431 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1432 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1433 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1434 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1435 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1436 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1437 } 1438 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1439 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1440 1441 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1442 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1443 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1444 if (mumps->id.ICNTL(24)) { 1445 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1446 } 1447 1448 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1449 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1450 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1451 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1452 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1453 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1454 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1455 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1456 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1458 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1460 1461 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1462 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1463 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1464 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1465 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1466 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1467 1468 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1469 1470 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1471 if (ninfo) { 1472 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1473 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1474 mumps->ninfo = ninfo; 1475 for (i=0; i<ninfo; i++) { 1476 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1477 else mumps->info[i] = info[i]; 1478 } 1479 } 1480 1481 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1486 { 1487 PetscErrorCode ierr; 1488 PetscInt nthreads=0; 1489 1490 PetscFunctionBegin; 1491 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1492 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1493 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1494 1495 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1496 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1497 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1498 if (mumps->use_petsc_omp_support) { 1499 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1500 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1501 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1502 #else 1503 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1504 #endif 1505 } else { 1506 mumps->omp_comm = PETSC_COMM_SELF; 1507 mumps->mumps_comm = mumps->petsc_comm; 1508 mumps->is_omp_master = PETSC_TRUE; 1509 } 1510 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1511 1512 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1513 mumps->id.job = JOB_INIT; 1514 mumps->id.par = 1; /* host participates factorizaton and solve */ 1515 mumps->id.sym = mumps->sym; 1516 1517 PetscMUMPS_c(mumps); 1518 1519 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1520 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1521 */ 1522 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1523 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1524 1525 mumps->scat_rhs = NULL; 1526 mumps->scat_sol = NULL; 1527 1528 /* set PETSc-MUMPS default options - override MUMPS default */ 1529 mumps->id.ICNTL(3) = 0; 1530 mumps->id.ICNTL(4) = 0; 1531 if (mumps->petsc_size == 1) { 1532 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1533 } else { 1534 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1535 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1536 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1537 } 1538 1539 /* schur */ 1540 mumps->id.size_schur = 0; 1541 mumps->id.listvar_schur = NULL; 1542 mumps->id.schur = NULL; 1543 mumps->sizeredrhs = 0; 1544 mumps->schur_sol = NULL; 1545 mumps->schur_sizesol = 0; 1546 PetscFunctionReturn(0); 1547 } 1548 1549 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1550 { 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1555 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1556 if (mumps->id.INFOG(1) < 0) { 1557 if (A->erroriffailure) { 1558 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1559 } else { 1560 if (mumps->id.INFOG(1) == -6) { 1561 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1562 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1563 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1564 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1565 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1566 } else { 1567 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1568 F->factorerrortype = MAT_FACTOR_OTHER; 1569 } 1570 } 1571 } 1572 PetscFunctionReturn(0); 1573 } 1574 1575 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1576 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1577 { 1578 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1579 PetscErrorCode ierr; 1580 Vec b; 1581 const PetscInt M = A->rmap->N; 1582 1583 PetscFunctionBegin; 1584 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1585 1586 /* Set MUMPS options from the options database */ 1587 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1588 1589 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1590 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1591 1592 /* analysis phase */ 1593 /*----------------*/ 1594 mumps->id.job = JOB_FACTSYMBOLIC; 1595 mumps->id.n = M; 1596 switch (mumps->id.ICNTL(18)) { 1597 case 0: /* centralized assembled matrix input */ 1598 if (!mumps->myid) { 1599 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1600 if (mumps->id.ICNTL(6)>1) { 1601 mumps->id.a = (MumpsScalar*)mumps->val; 1602 } 1603 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1604 /* 1605 PetscBool flag; 1606 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1607 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1608 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1609 */ 1610 if (!mumps->myid) { 1611 const PetscInt *idx; 1612 PetscInt i,*perm_in; 1613 1614 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1615 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1616 1617 mumps->id.perm_in = perm_in; 1618 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1619 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 break; 1624 case 3: /* distributed assembled matrix input (size>1) */ 1625 mumps->id.nz_loc = mumps->nz; 1626 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1627 if (mumps->id.ICNTL(6)>1) { 1628 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1629 } 1630 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1631 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1632 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1633 ierr = VecDestroy(&b);CHKERRQ(ierr); 1634 break; 1635 } 1636 PetscMUMPS_c(mumps); 1637 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1638 1639 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1640 F->ops->solve = MatSolve_MUMPS; 1641 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1642 F->ops->matsolve = MatMatSolve_MUMPS; 1643 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1644 PetscFunctionReturn(0); 1645 } 1646 1647 /* Note the Petsc r and c permutations are ignored */ 1648 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1649 { 1650 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1651 PetscErrorCode ierr; 1652 Vec b; 1653 const PetscInt M = A->rmap->N; 1654 1655 PetscFunctionBegin; 1656 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1657 1658 /* Set MUMPS options from the options database */ 1659 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1660 1661 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1662 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1663 1664 /* analysis phase */ 1665 /*----------------*/ 1666 mumps->id.job = JOB_FACTSYMBOLIC; 1667 mumps->id.n = M; 1668 switch (mumps->id.ICNTL(18)) { 1669 case 0: /* centralized assembled matrix input */ 1670 if (!mumps->myid) { 1671 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1672 if (mumps->id.ICNTL(6)>1) { 1673 mumps->id.a = (MumpsScalar*)mumps->val; 1674 } 1675 } 1676 break; 1677 case 3: /* distributed assembled matrix input (size>1) */ 1678 mumps->id.nz_loc = mumps->nz; 1679 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1680 if (mumps->id.ICNTL(6)>1) { 1681 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1682 } 1683 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1684 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1685 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1686 ierr = VecDestroy(&b);CHKERRQ(ierr); 1687 break; 1688 } 1689 PetscMUMPS_c(mumps); 1690 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1691 1692 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1693 F->ops->solve = MatSolve_MUMPS; 1694 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1695 PetscFunctionReturn(0); 1696 } 1697 1698 /* Note the Petsc r permutation and factor info are ignored */ 1699 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1700 { 1701 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1702 PetscErrorCode ierr; 1703 Vec b; 1704 const PetscInt M = A->rmap->N; 1705 1706 PetscFunctionBegin; 1707 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1708 1709 /* Set MUMPS options from the options database */ 1710 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1711 1712 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1713 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1714 1715 /* analysis phase */ 1716 /*----------------*/ 1717 mumps->id.job = JOB_FACTSYMBOLIC; 1718 mumps->id.n = M; 1719 switch (mumps->id.ICNTL(18)) { 1720 case 0: /* centralized assembled matrix input */ 1721 if (!mumps->myid) { 1722 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1723 if (mumps->id.ICNTL(6)>1) { 1724 mumps->id.a = (MumpsScalar*)mumps->val; 1725 } 1726 } 1727 break; 1728 case 3: /* distributed assembled matrix input (size>1) */ 1729 mumps->id.nz_loc = mumps->nz; 1730 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1731 if (mumps->id.ICNTL(6)>1) { 1732 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1733 } 1734 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1735 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1736 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1737 ierr = VecDestroy(&b);CHKERRQ(ierr); 1738 break; 1739 } 1740 PetscMUMPS_c(mumps); 1741 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1742 1743 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1744 F->ops->solve = MatSolve_MUMPS; 1745 F->ops->solvetranspose = MatSolve_MUMPS; 1746 F->ops->matsolve = MatMatSolve_MUMPS; 1747 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1748 #if defined(PETSC_USE_COMPLEX) 1749 F->ops->getinertia = NULL; 1750 #else 1751 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1752 #endif 1753 PetscFunctionReturn(0); 1754 } 1755 1756 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1757 { 1758 PetscErrorCode ierr; 1759 PetscBool iascii; 1760 PetscViewerFormat format; 1761 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1762 1763 PetscFunctionBegin; 1764 /* check if matrix is mumps type */ 1765 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1766 1767 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1768 if (iascii) { 1769 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1770 if (format == PETSC_VIEWER_ASCII_INFO) { 1771 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1772 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1784 if (mumps->id.ICNTL(11)>0) { 1785 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1791 } 1792 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1795 /* ICNTL(15-17) not used */ 1796 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1797 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1798 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1802 1803 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1809 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1816 1817 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1820 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1823 1824 /* infomation local to each processor */ 1825 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1827 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1828 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1830 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1831 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1833 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1834 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1835 1836 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1837 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1838 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1839 1840 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1841 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1842 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1843 1844 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1845 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1846 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1847 1848 if (mumps->ninfo && mumps->ninfo <= 80){ 1849 PetscInt i; 1850 for (i=0; i<mumps->ninfo; i++){ 1851 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1852 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1853 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1854 } 1855 } 1856 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1857 1858 if (!mumps->myid) { /* information from the host */ 1859 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1860 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1862 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1863 1864 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1865 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1866 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1868 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1869 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1870 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1871 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1872 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1873 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1874 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1875 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1876 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1877 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1878 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1879 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1880 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1881 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1882 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1883 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1884 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1885 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1886 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1887 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1888 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1890 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1891 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1892 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1898 } 1899 } 1900 } 1901 PetscFunctionReturn(0); 1902 } 1903 1904 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1905 { 1906 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1907 1908 PetscFunctionBegin; 1909 info->block_size = 1.0; 1910 info->nz_allocated = mumps->id.INFOG(20); 1911 info->nz_used = mumps->id.INFOG(20); 1912 info->nz_unneeded = 0.0; 1913 info->assemblies = 0.0; 1914 info->mallocs = 0.0; 1915 info->memory = 0.0; 1916 info->fill_ratio_given = 0; 1917 info->fill_ratio_needed = 0; 1918 info->factor_mallocs = 0; 1919 PetscFunctionReturn(0); 1920 } 1921 1922 /* -------------------------------------------------------------------------------------------*/ 1923 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1924 { 1925 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1926 const PetscInt *idxs; 1927 PetscInt size,i; 1928 PetscErrorCode ierr; 1929 1930 PetscFunctionBegin; 1931 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1932 if (mumps->petsc_size > 1) { 1933 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1934 1935 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1936 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1937 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1938 } 1939 if (mumps->id.size_schur != size) { 1940 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1941 mumps->id.size_schur = size; 1942 mumps->id.schur_lld = size; 1943 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1944 } 1945 1946 /* Schur complement matrix */ 1947 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1948 if (mumps->sym == 1) { 1949 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1950 } 1951 1952 /* MUMPS expects Fortran style indices */ 1953 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1954 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1955 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1956 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1957 if (mumps->petsc_size > 1) { 1958 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1959 } else { 1960 if (F->factortype == MAT_FACTOR_LU) { 1961 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1962 } else { 1963 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1964 } 1965 } 1966 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1967 mumps->id.ICNTL(26) = -1; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* -------------------------------------------------------------------------------------------*/ 1972 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1973 { 1974 Mat St; 1975 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1976 PetscScalar *array; 1977 #if defined(PETSC_USE_COMPLEX) 1978 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1979 #endif 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1984 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1985 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1986 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1987 ierr = MatSetUp(St);CHKERRQ(ierr); 1988 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1989 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1990 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1991 PetscInt i,j,N=mumps->id.size_schur; 1992 for (i=0;i<N;i++) { 1993 for (j=0;j<N;j++) { 1994 #if !defined(PETSC_USE_COMPLEX) 1995 PetscScalar val = mumps->id.schur[i*N+j]; 1996 #else 1997 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1998 #endif 1999 array[j*N+i] = val; 2000 } 2001 } 2002 } else { /* stored by columns */ 2003 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2004 } 2005 } else { /* either full or lower-triangular (not packed) */ 2006 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2007 PetscInt i,j,N=mumps->id.size_schur; 2008 for (i=0;i<N;i++) { 2009 for (j=i;j<N;j++) { 2010 #if !defined(PETSC_USE_COMPLEX) 2011 PetscScalar val = mumps->id.schur[i*N+j]; 2012 #else 2013 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2014 #endif 2015 array[i*N+j] = val; 2016 array[j*N+i] = val; 2017 } 2018 } 2019 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2020 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2021 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2022 PetscInt i,j,N=mumps->id.size_schur; 2023 for (i=0;i<N;i++) { 2024 for (j=0;j<i+1;j++) { 2025 #if !defined(PETSC_USE_COMPLEX) 2026 PetscScalar val = mumps->id.schur[i*N+j]; 2027 #else 2028 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2029 #endif 2030 array[i*N+j] = val; 2031 array[j*N+i] = val; 2032 } 2033 } 2034 } 2035 } 2036 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2037 *S = St; 2038 PetscFunctionReturn(0); 2039 } 2040 2041 /* -------------------------------------------------------------------------------------------*/ 2042 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2043 { 2044 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2045 2046 PetscFunctionBegin; 2047 mumps->id.ICNTL(icntl) = ival; 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2052 { 2053 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2054 2055 PetscFunctionBegin; 2056 *ival = mumps->id.ICNTL(icntl); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 /*@ 2061 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2062 2063 Logically Collective on Mat 2064 2065 Input Parameters: 2066 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2067 . icntl - index of MUMPS parameter array ICNTL() 2068 - ival - value of MUMPS ICNTL(icntl) 2069 2070 Options Database: 2071 . -mat_mumps_icntl_<icntl> <ival> 2072 2073 Level: beginner 2074 2075 References: 2076 . MUMPS Users' Guide 2077 2078 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2079 @*/ 2080 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2081 { 2082 PetscErrorCode ierr; 2083 2084 PetscFunctionBegin; 2085 PetscValidType(F,1); 2086 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2087 PetscValidLogicalCollectiveInt(F,icntl,2); 2088 PetscValidLogicalCollectiveInt(F,ival,3); 2089 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /*@ 2094 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2095 2096 Logically Collective on Mat 2097 2098 Input Parameters: 2099 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2100 - icntl - index of MUMPS parameter array ICNTL() 2101 2102 Output Parameter: 2103 . ival - value of MUMPS ICNTL(icntl) 2104 2105 Level: beginner 2106 2107 References: 2108 . MUMPS Users' Guide 2109 2110 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2111 @*/ 2112 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2113 { 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 PetscValidType(F,1); 2118 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2119 PetscValidLogicalCollectiveInt(F,icntl,2); 2120 PetscValidIntPointer(ival,3); 2121 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /* -------------------------------------------------------------------------------------------*/ 2126 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2127 { 2128 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2129 2130 PetscFunctionBegin; 2131 mumps->id.CNTL(icntl) = val; 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2136 { 2137 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2138 2139 PetscFunctionBegin; 2140 *val = mumps->id.CNTL(icntl); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 /*@ 2145 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2146 2147 Logically Collective on Mat 2148 2149 Input Parameters: 2150 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2151 . icntl - index of MUMPS parameter array CNTL() 2152 - val - value of MUMPS CNTL(icntl) 2153 2154 Options Database: 2155 . -mat_mumps_cntl_<icntl> <val> 2156 2157 Level: beginner 2158 2159 References: 2160 . MUMPS Users' Guide 2161 2162 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2163 @*/ 2164 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2165 { 2166 PetscErrorCode ierr; 2167 2168 PetscFunctionBegin; 2169 PetscValidType(F,1); 2170 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2171 PetscValidLogicalCollectiveInt(F,icntl,2); 2172 PetscValidLogicalCollectiveReal(F,val,3); 2173 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 /*@ 2178 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2179 2180 Logically Collective on Mat 2181 2182 Input Parameters: 2183 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2184 - icntl - index of MUMPS parameter array CNTL() 2185 2186 Output Parameter: 2187 . val - value of MUMPS CNTL(icntl) 2188 2189 Level: beginner 2190 2191 References: 2192 . MUMPS Users' Guide 2193 2194 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2195 @*/ 2196 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2197 { 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 PetscValidType(F,1); 2202 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2203 PetscValidLogicalCollectiveInt(F,icntl,2); 2204 PetscValidRealPointer(val,3); 2205 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2210 { 2211 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2212 2213 PetscFunctionBegin; 2214 *info = mumps->id.INFO(icntl); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2219 { 2220 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2221 2222 PetscFunctionBegin; 2223 *infog = mumps->id.INFOG(icntl); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2228 { 2229 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2230 2231 PetscFunctionBegin; 2232 *rinfo = mumps->id.RINFO(icntl); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2237 { 2238 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2239 2240 PetscFunctionBegin; 2241 *rinfog = mumps->id.RINFOG(icntl); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2246 { 2247 PetscErrorCode ierr; 2248 Mat Bt = NULL,Btseq = NULL; 2249 PetscBool flg; 2250 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2251 PetscScalar *aa; 2252 PetscInt spnr,*ia,*ja; 2253 2254 PetscFunctionBegin; 2255 PetscValidIntPointer(spRHS,2); 2256 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2257 if (flg) { 2258 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2259 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2260 2261 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2262 2263 if (mumps->petsc_size > 1) { 2264 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2265 Btseq = b->A; 2266 } else { 2267 Btseq = Bt; 2268 } 2269 2270 if (!mumps->myid) { 2271 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2272 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2273 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2274 2275 mumps->id.irhs_ptr = ia; 2276 mumps->id.irhs_sparse = ja; 2277 mumps->id.nz_rhs = ia[spnr] - 1; 2278 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2279 } else { 2280 mumps->id.irhs_ptr = NULL; 2281 mumps->id.irhs_sparse = NULL; 2282 mumps->id.nz_rhs = 0; 2283 mumps->id.rhs_sparse = NULL; 2284 } 2285 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2286 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2287 2288 /* solve phase */ 2289 /*-------------*/ 2290 mumps->id.job = JOB_SOLVE; 2291 PetscMUMPS_c(mumps); 2292 if (mumps->id.INFOG(1) < 0) 2293 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2294 2295 if (!mumps->myid) { 2296 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2297 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2298 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2299 } 2300 PetscFunctionReturn(0); 2301 } 2302 2303 /*@ 2304 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2305 2306 Logically Collective on Mat 2307 2308 Input Parameters: 2309 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2310 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2311 2312 Output Parameter: 2313 . spRHS - requested entries of inverse of A 2314 2315 Level: beginner 2316 2317 References: 2318 . MUMPS Users' Guide 2319 2320 .seealso: MatGetFactor(), MatCreateTranspose() 2321 @*/ 2322 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2323 { 2324 PetscErrorCode ierr; 2325 2326 PetscFunctionBegin; 2327 PetscValidType(F,1); 2328 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2329 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2330 PetscFunctionReturn(0); 2331 } 2332 2333 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2334 { 2335 PetscErrorCode ierr; 2336 Mat spRHS; 2337 2338 PetscFunctionBegin; 2339 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2340 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2341 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 /*@ 2346 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2347 2348 Logically Collective on Mat 2349 2350 Input Parameters: 2351 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2352 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2353 2354 Output Parameter: 2355 . spRHST - requested entries of inverse of A^T 2356 2357 Level: beginner 2358 2359 References: 2360 . MUMPS Users' Guide 2361 2362 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2363 @*/ 2364 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2365 { 2366 PetscErrorCode ierr; 2367 PetscBool flg; 2368 2369 PetscFunctionBegin; 2370 PetscValidType(F,1); 2371 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2372 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2373 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2374 2375 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 /*@ 2380 MatMumpsGetInfo - Get MUMPS parameter INFO() 2381 2382 Logically Collective on Mat 2383 2384 Input Parameters: 2385 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2386 - icntl - index of MUMPS parameter array INFO() 2387 2388 Output Parameter: 2389 . ival - value of MUMPS INFO(icntl) 2390 2391 Level: beginner 2392 2393 References: 2394 . MUMPS Users' Guide 2395 2396 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2397 @*/ 2398 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2399 { 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 PetscValidType(F,1); 2404 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2405 PetscValidIntPointer(ival,3); 2406 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 /*@ 2411 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2412 2413 Logically Collective on Mat 2414 2415 Input Parameters: 2416 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2417 - icntl - index of MUMPS parameter array INFOG() 2418 2419 Output Parameter: 2420 . ival - value of MUMPS INFOG(icntl) 2421 2422 Level: beginner 2423 2424 References: 2425 . MUMPS Users' Guide 2426 2427 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2428 @*/ 2429 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2430 { 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 PetscValidType(F,1); 2435 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2436 PetscValidIntPointer(ival,3); 2437 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 /*@ 2442 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2443 2444 Logically Collective on Mat 2445 2446 Input Parameters: 2447 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2448 - icntl - index of MUMPS parameter array RINFO() 2449 2450 Output Parameter: 2451 . val - value of MUMPS RINFO(icntl) 2452 2453 Level: beginner 2454 2455 References: 2456 . MUMPS Users' Guide 2457 2458 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2459 @*/ 2460 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2461 { 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 PetscValidType(F,1); 2466 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2467 PetscValidRealPointer(val,3); 2468 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 /*@ 2473 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2474 2475 Logically Collective on Mat 2476 2477 Input Parameters: 2478 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2479 - icntl - index of MUMPS parameter array RINFOG() 2480 2481 Output Parameter: 2482 . val - value of MUMPS RINFOG(icntl) 2483 2484 Level: beginner 2485 2486 References: 2487 . MUMPS Users' Guide 2488 2489 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2490 @*/ 2491 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 PetscValidType(F,1); 2497 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2498 PetscValidRealPointer(val,3); 2499 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /*MC 2504 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2505 distributed and sequential matrices via the external package MUMPS. 2506 2507 Works with MATAIJ and MATSBAIJ matrices 2508 2509 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2510 2511 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2512 2513 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2514 2515 Options Database Keys: 2516 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2517 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2518 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2519 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2520 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2521 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2522 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2523 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2524 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2525 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2526 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2527 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2528 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2529 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2530 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2531 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2532 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2533 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2534 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2535 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2536 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2537 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2538 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2539 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2540 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2541 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2542 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2543 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2544 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2545 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2546 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2547 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2548 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2549 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2550 2551 Level: beginner 2552 2553 Notes: 2554 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2555 2556 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2557 $ KSPGetPC(ksp,&pc); 2558 $ PCFactorGetMatrix(pc,&mat); 2559 $ MatMumpsGetInfo(mat,....); 2560 $ MatMumpsGetInfog(mat,....); etc. 2561 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2562 2563 Two modes to run MUMPS/PETSc with OpenMP 2564 2565 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2566 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2567 2568 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2569 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2570 2571 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2572 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2573 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2574 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2575 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2576 2577 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2578 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2579 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2580 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2581 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2582 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2583 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2584 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2585 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2586 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2587 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2588 examine the mapping result. 2589 2590 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2591 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2592 calls omp_set_num_threads(m) internally before calling MUMPS. 2593 2594 References: 2595 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2596 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2597 2598 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2599 2600 M*/ 2601 2602 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2603 { 2604 PetscFunctionBegin; 2605 *type = MATSOLVERMUMPS; 2606 PetscFunctionReturn(0); 2607 } 2608 2609 /* MatGetFactor for Seq and MPI AIJ matrices */ 2610 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2611 { 2612 Mat B; 2613 PetscErrorCode ierr; 2614 Mat_MUMPS *mumps; 2615 PetscBool isSeqAIJ; 2616 2617 PetscFunctionBegin; 2618 /* Create the factorization matrix */ 2619 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2620 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2621 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2622 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2623 ierr = MatSetUp(B);CHKERRQ(ierr); 2624 2625 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2626 2627 B->ops->view = MatView_MUMPS; 2628 B->ops->getinfo = MatGetInfo_MUMPS; 2629 2630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2643 2644 if (ftype == MAT_FACTOR_LU) { 2645 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2646 B->factortype = MAT_FACTOR_LU; 2647 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2648 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2649 mumps->sym = 0; 2650 } else { 2651 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2652 B->factortype = MAT_FACTOR_CHOLESKY; 2653 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2654 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2655 #if defined(PETSC_USE_COMPLEX) 2656 mumps->sym = 2; 2657 #else 2658 if (A->spd_set && A->spd) mumps->sym = 1; 2659 else mumps->sym = 2; 2660 #endif 2661 } 2662 2663 /* set solvertype */ 2664 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2665 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2666 2667 B->ops->destroy = MatDestroy_MUMPS; 2668 B->data = (void*)mumps; 2669 2670 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2671 2672 *F = B; 2673 PetscFunctionReturn(0); 2674 } 2675 2676 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2677 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2678 { 2679 Mat B; 2680 PetscErrorCode ierr; 2681 Mat_MUMPS *mumps; 2682 PetscBool isSeqSBAIJ; 2683 2684 PetscFunctionBegin; 2685 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2686 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2687 /* Create the factorization matrix */ 2688 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2689 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2690 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2691 ierr = MatSetUp(B);CHKERRQ(ierr); 2692 2693 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2694 if (isSeqSBAIJ) { 2695 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2696 } else { 2697 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2698 } 2699 2700 B->ops->getinfo = MatGetInfo_External; 2701 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2702 B->ops->view = MatView_MUMPS; 2703 2704 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2705 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2707 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2710 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2717 2718 B->factortype = MAT_FACTOR_CHOLESKY; 2719 #if defined(PETSC_USE_COMPLEX) 2720 mumps->sym = 2; 2721 #else 2722 if (A->spd_set && A->spd) mumps->sym = 1; 2723 else mumps->sym = 2; 2724 #endif 2725 2726 /* set solvertype */ 2727 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2728 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2729 2730 B->ops->destroy = MatDestroy_MUMPS; 2731 B->data = (void*)mumps; 2732 2733 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2734 2735 *F = B; 2736 PetscFunctionReturn(0); 2737 } 2738 2739 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2740 { 2741 Mat B; 2742 PetscErrorCode ierr; 2743 Mat_MUMPS *mumps; 2744 PetscBool isSeqBAIJ; 2745 2746 PetscFunctionBegin; 2747 /* Create the factorization matrix */ 2748 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2749 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2750 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2751 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2752 ierr = MatSetUp(B);CHKERRQ(ierr); 2753 2754 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2755 if (ftype == MAT_FACTOR_LU) { 2756 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2757 B->factortype = MAT_FACTOR_LU; 2758 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2759 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2760 mumps->sym = 0; 2761 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2762 2763 B->ops->getinfo = MatGetInfo_External; 2764 B->ops->view = MatView_MUMPS; 2765 2766 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2767 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2768 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2771 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2773 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2774 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2779 2780 /* set solvertype */ 2781 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2782 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2783 2784 B->ops->destroy = MatDestroy_MUMPS; 2785 B->data = (void*)mumps; 2786 2787 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2788 2789 *F = B; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 /* MatGetFactor for Seq and MPI SELL matrices */ 2794 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2795 { 2796 Mat B; 2797 PetscErrorCode ierr; 2798 Mat_MUMPS *mumps; 2799 PetscBool isSeqSELL; 2800 2801 PetscFunctionBegin; 2802 /* Create the factorization matrix */ 2803 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2804 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2805 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2806 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2807 ierr = MatSetUp(B);CHKERRQ(ierr); 2808 2809 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2810 2811 B->ops->view = MatView_MUMPS; 2812 B->ops->getinfo = MatGetInfo_MUMPS; 2813 2814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2825 2826 if (ftype == MAT_FACTOR_LU) { 2827 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2828 B->factortype = MAT_FACTOR_LU; 2829 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2830 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2831 mumps->sym = 0; 2832 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2833 2834 /* set solvertype */ 2835 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2836 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2837 2838 B->ops->destroy = MatDestroy_MUMPS; 2839 B->data = (void*)mumps; 2840 2841 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2842 2843 *F = B; 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2848 { 2849 PetscErrorCode ierr; 2850 2851 PetscFunctionBegin; 2852 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2853 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2854 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2855 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2856 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2857 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2858 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2859 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2860 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2861 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2862 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866