1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 switch (schurstatus) { 154 case MAT_FACTOR_SCHUR_FACTORED: 155 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 156 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 157 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 158 } else { 159 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 160 } 161 break; 162 case MAT_FACTOR_SCHUR_INVERTED: 163 sizesol = mumps->id.nrhs*mumps->id.size_schur; 164 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 165 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 166 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 167 mumps->schur_sizesol = sizesol; 168 } 169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 170 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 171 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 172 } else { 173 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 174 } 175 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 176 break; 177 default: 178 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 179 break; 180 } 181 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 182 ierr = MatDestroy(&B);CHKERRQ(ierr); 183 ierr = MatDestroy(&X);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 188 { 189 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 194 PetscFunctionReturn(0); 195 } 196 if (!expansion) { /* prepare for the condensation step */ 197 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 198 /* allocate MUMPS internal array to store reduced right-hand sides */ 199 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 200 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 201 mumps->id.lredrhs = mumps->id.size_schur; 202 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 203 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 204 } 205 mumps->id.ICNTL(26) = 1; /* condensation phase */ 206 } else { /* prepare for the expansion step */ 207 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 208 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 209 mumps->id.ICNTL(26) = 2; /* expansion phase */ 210 PetscMUMPS_c(mumps); 211 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 212 /* restore defaults */ 213 mumps->id.ICNTL(26) = -1; 214 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 215 if (mumps->id.nrhs > 1) { 216 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 217 mumps->id.lredrhs = 0; 218 mumps->sizeredrhs = 0; 219 } 220 } 221 PetscFunctionReturn(0); 222 } 223 224 /* 225 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 226 227 input: 228 A - matrix in aij,baij or sbaij format 229 shift - 0: C style output triple; 1: Fortran style output triple. 230 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 231 MAT_REUSE_MATRIX: only the values in v array are updated 232 output: 233 nnz - dim of r, c, and v (number of local nonzero entries of A) 234 r, c, v - row and col index, matrix values (matrix triples) 235 236 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 237 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 238 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 239 240 */ 241 242 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 243 { 244 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 245 PetscInt nz,rnz,i,j; 246 PetscErrorCode ierr; 247 PetscInt *row,*col; 248 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 249 250 PetscFunctionBegin; 251 *v=aa->a; 252 if (reuse == MAT_INITIAL_MATRIX) { 253 nz = aa->nz; 254 ai = aa->i; 255 aj = aa->j; 256 *nnz = nz; 257 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 258 col = row + nz; 259 260 nz = 0; 261 for (i=0; i<M; i++) { 262 rnz = ai[i+1] - ai[i]; 263 ajj = aj + ai[i]; 264 for (j=0; j<rnz; j++) { 265 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 266 } 267 } 268 *r = row; *c = col; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 274 { 275 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 276 PetscInt *ptr; 277 278 PetscFunctionBegin; 279 *v = a->val; 280 if (reuse == MAT_INITIAL_MATRIX) { 281 PetscInt nz,i,j,row; 282 PetscErrorCode ierr; 283 284 nz = a->sliidx[a->totalslices]; 285 *nnz = nz; 286 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 287 *r = ptr; 288 *c = ptr + nz; 289 290 for (i=0; i<a->totalslices; i++) { 291 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 292 *ptr++ = 8*i + row + shift; 293 } 294 } 295 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 301 { 302 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 303 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 304 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 305 PetscErrorCode ierr; 306 PetscInt *row,*col; 307 308 PetscFunctionBegin; 309 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 310 M = A->rmap->N/bs; 311 *v = aa->a; 312 if (reuse == MAT_INITIAL_MATRIX) { 313 ai = aa->i; aj = aa->j; 314 nz = bs2*aa->nz; 315 *nnz = nz; 316 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 317 col = row + nz; 318 319 for (i=0; i<M; i++) { 320 ajj = aj + ai[i]; 321 rnz = ai[i+1] - ai[i]; 322 for (k=0; k<rnz; k++) { 323 for (j=0; j<bs; j++) { 324 for (m=0; m<bs; m++) { 325 row[idx] = i*bs + m + shift; 326 col[idx++] = bs*(ajj[k]) + j + shift; 327 } 328 } 329 } 330 } 331 *r = row; *c = col; 332 } 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 337 { 338 const PetscInt *ai, *aj,*ajj; 339 PetscInt nz,rnz,i,j,k,m,bs; 340 PetscErrorCode ierr; 341 PetscInt *row,*col; 342 PetscScalar *val; 343 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 344 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 345 #if defined(PETSC_USE_COMPLEX) 346 PetscBool hermitian; 347 #endif 348 349 PetscFunctionBegin; 350 #if defined(PETSC_USE_COMPLEX) 351 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 352 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 353 #endif 354 ai = aa->i; 355 aj = aa->j; 356 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 357 if (reuse == MAT_INITIAL_MATRIX) { 358 nz = aa->nz; 359 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 360 col = row + bs2*nz; 361 if (bs>1) 362 val = (PetscScalar*)(col + bs2*nz); 363 else 364 val = aa->a; 365 366 *r = row; *c = col; *v = val; 367 } else { 368 row = *r; col = *c; val = *v; 369 } 370 371 nz = 0; 372 if (bs>1) { 373 for (i=0; i<mbs; i++) { 374 rnz = ai[i+1] - ai[i]; 375 ajj = aj + ai[i]; 376 for (j=0; j<rnz; j++) { 377 for (k=0; k<bs; k++) { 378 for (m=0; m<bs; m++) { 379 if (ajj[j]>i || k>=m) { 380 if (reuse == MAT_INITIAL_MATRIX) { 381 row[nz] = i*bs + m + shift; 382 col[nz] = ajj[j]*bs + k + shift; 383 } 384 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 385 } 386 } 387 } 388 } 389 } 390 } else if (reuse == MAT_INITIAL_MATRIX) { 391 for (i=0; i<mbs; i++) { 392 rnz = ai[i+1] - ai[i]; 393 ajj = aj + ai[i]; 394 for (j=0; j<rnz; j++) { 395 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 396 } 397 } 398 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 399 } 400 *nnz = nz; 401 PetscFunctionReturn(0); 402 } 403 404 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 405 { 406 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 407 PetscInt nz,rnz,i,j; 408 const PetscScalar *av,*v1; 409 PetscScalar *val; 410 PetscErrorCode ierr; 411 PetscInt *row,*col; 412 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 413 PetscBool missing; 414 #if defined(PETSC_USE_COMPLEX) 415 PetscBool hermitian; 416 #endif 417 418 PetscFunctionBegin; 419 #if defined(PETSC_USE_COMPLEX) 420 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 421 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 422 #endif 423 ai = aa->i; aj = aa->j; av = aa->a; 424 adiag = aa->diag; 425 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 426 if (reuse == MAT_INITIAL_MATRIX) { 427 /* count nz in the upper triangular part of A */ 428 nz = 0; 429 if (missing) { 430 for (i=0; i<M; i++) { 431 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 432 for (j=ai[i];j<ai[i+1];j++) { 433 if (aj[j] < i) continue; 434 nz++; 435 } 436 } else { 437 nz += ai[i+1] - adiag[i]; 438 } 439 } 440 } else { 441 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 442 } 443 *nnz = nz; 444 445 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 446 col = row + nz; 447 val = (PetscScalar*)(col + nz); 448 449 nz = 0; 450 if (missing) { 451 for (i=0; i<M; i++) { 452 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 453 for (j=ai[i];j<ai[i+1];j++) { 454 if (aj[j] < i) continue; 455 row[nz] = i+shift; 456 col[nz] = aj[j]+shift; 457 val[nz] = av[j]; 458 nz++; 459 } 460 } else { 461 rnz = ai[i+1] - adiag[i]; 462 ajj = aj + adiag[i]; 463 v1 = av + adiag[i]; 464 for (j=0; j<rnz; j++) { 465 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 466 } 467 } 468 } 469 } else { 470 for (i=0; i<M; i++) { 471 rnz = ai[i+1] - adiag[i]; 472 ajj = aj + adiag[i]; 473 v1 = av + adiag[i]; 474 for (j=0; j<rnz; j++) { 475 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 476 } 477 } 478 } 479 *r = row; *c = col; *v = val; 480 } else { 481 nz = 0; val = *v; 482 if (missing) { 483 for (i=0; i <M; i++) { 484 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 485 for (j=ai[i];j<ai[i+1];j++) { 486 if (aj[j] < i) continue; 487 val[nz++] = av[j]; 488 } 489 } else { 490 rnz = ai[i+1] - adiag[i]; 491 v1 = av + adiag[i]; 492 for (j=0; j<rnz; j++) { 493 val[nz++] = v1[j]; 494 } 495 } 496 } 497 } else { 498 for (i=0; i <M; i++) { 499 rnz = ai[i+1] - adiag[i]; 500 v1 = av + adiag[i]; 501 for (j=0; j<rnz; j++) { 502 val[nz++] = v1[j]; 503 } 504 } 505 } 506 } 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 511 { 512 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 513 PetscErrorCode ierr; 514 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 515 PetscInt *row,*col; 516 const PetscScalar *av,*bv,*v1,*v2; 517 PetscScalar *val; 518 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 519 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 520 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 521 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 522 #if defined(PETSC_USE_COMPLEX) 523 PetscBool hermitian; 524 #endif 525 526 PetscFunctionBegin; 527 #if defined(PETSC_USE_COMPLEX) 528 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 529 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 530 #endif 531 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 532 rstart = A->rmap->rstart; 533 ai = aa->i; 534 aj = aa->j; 535 bi = bb->i; 536 bj = bb->j; 537 av = aa->a; 538 bv = bb->a; 539 540 garray = mat->garray; 541 542 if (reuse == MAT_INITIAL_MATRIX) { 543 nz = aa->nz + bb->nz; 544 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 545 col = row + bs2*nz; 546 val = (PetscScalar*)(col + bs2*nz); 547 548 *r = row; *c = col; *v = val; 549 } else { 550 row = *r; col = *c; val = *v; 551 } 552 553 jj = 0; irow = rstart; 554 for (i=0; i<mbs; i++) { 555 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 556 countA = ai[i+1] - ai[i]; 557 countB = bi[i+1] - bi[i]; 558 bjj = bj + bi[i]; 559 v1 = av + ai[i]*bs2; 560 v2 = bv + bi[i]*bs2; 561 562 if (bs>1) { 563 /* A-part */ 564 for (j=0; j<countA; j++) { 565 for (k=0; k<bs; k++) { 566 for (m=0; m<bs; m++) { 567 if (rstart + ajj[j]*bs>irow || k>=m) { 568 if (reuse == MAT_INITIAL_MATRIX) { 569 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 570 } 571 val[jj++] = v1[j*bs2 + m + k*bs]; 572 } 573 } 574 } 575 } 576 577 /* B-part */ 578 for (j=0; j < countB; j++) { 579 for (k=0; k<bs; k++) { 580 for (m=0; m<bs; m++) { 581 if (reuse == MAT_INITIAL_MATRIX) { 582 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 583 } 584 val[jj++] = v2[j*bs2 + m + k*bs]; 585 } 586 } 587 } 588 } else { 589 /* A-part */ 590 for (j=0; j<countA; j++) { 591 if (reuse == MAT_INITIAL_MATRIX) { 592 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 593 } 594 val[jj++] = v1[j]; 595 } 596 597 /* B-part */ 598 for (j=0; j < countB; j++) { 599 if (reuse == MAT_INITIAL_MATRIX) { 600 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 601 } 602 val[jj++] = v2[j]; 603 } 604 } 605 irow+=bs; 606 } 607 *nnz = jj; 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 612 { 613 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 614 PetscErrorCode ierr; 615 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 616 PetscInt *row,*col; 617 const PetscScalar *av, *bv,*v1,*v2; 618 PetscScalar *val; 619 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 620 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 621 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 622 623 PetscFunctionBegin; 624 rstart = A->rmap->rstart; 625 ai = aa->i; 626 aj = aa->j; 627 bi = bb->i; 628 bj = bb->j; 629 av = aa->a; 630 bv = bb->a; 631 632 garray = mat->garray; 633 634 if (reuse == MAT_INITIAL_MATRIX) { 635 nz = aa->nz + bb->nz; 636 *nnz = nz; 637 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 638 col = row + nz; 639 val = (PetscScalar*)(col + nz); 640 641 *r = row; *c = col; *v = val; 642 } else { 643 row = *r; col = *c; val = *v; 644 } 645 646 jj = 0; irow = rstart; 647 for (i=0; i<m; i++) { 648 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 649 countA = ai[i+1] - ai[i]; 650 countB = bi[i+1] - bi[i]; 651 bjj = bj + bi[i]; 652 v1 = av + ai[i]; 653 v2 = bv + bi[i]; 654 655 /* A-part */ 656 for (j=0; j<countA; j++) { 657 if (reuse == MAT_INITIAL_MATRIX) { 658 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 659 } 660 val[jj++] = v1[j]; 661 } 662 663 /* B-part */ 664 for (j=0; j < countB; j++) { 665 if (reuse == MAT_INITIAL_MATRIX) { 666 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 667 } 668 val[jj++] = v2[j]; 669 } 670 irow++; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 676 { 677 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 678 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 679 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 680 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 681 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 682 const PetscInt bs2=mat->bs2; 683 PetscErrorCode ierr; 684 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 685 PetscInt *row,*col; 686 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 687 PetscScalar *val; 688 689 PetscFunctionBegin; 690 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 691 if (reuse == MAT_INITIAL_MATRIX) { 692 nz = bs2*(aa->nz + bb->nz); 693 *nnz = nz; 694 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 695 col = row + nz; 696 val = (PetscScalar*)(col + nz); 697 698 *r = row; *c = col; *v = val; 699 } else { 700 row = *r; col = *c; val = *v; 701 } 702 703 jj = 0; irow = rstart; 704 for (i=0; i<mbs; i++) { 705 countA = ai[i+1] - ai[i]; 706 countB = bi[i+1] - bi[i]; 707 ajj = aj + ai[i]; 708 bjj = bj + bi[i]; 709 v1 = av + bs2*ai[i]; 710 v2 = bv + bs2*bi[i]; 711 712 idx = 0; 713 /* A-part */ 714 for (k=0; k<countA; k++) { 715 for (j=0; j<bs; j++) { 716 for (n=0; n<bs; n++) { 717 if (reuse == MAT_INITIAL_MATRIX) { 718 row[jj] = irow + n + shift; 719 col[jj] = rstart + bs*ajj[k] + j + shift; 720 } 721 val[jj++] = v1[idx++]; 722 } 723 } 724 } 725 726 idx = 0; 727 /* B-part */ 728 for (k=0; k<countB; k++) { 729 for (j=0; j<bs; j++) { 730 for (n=0; n<bs; n++) { 731 if (reuse == MAT_INITIAL_MATRIX) { 732 row[jj] = irow + n + shift; 733 col[jj] = bs*garray[bjj[k]] + j + shift; 734 } 735 val[jj++] = v2[idx++]; 736 } 737 } 738 } 739 irow += bs; 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 745 { 746 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 747 PetscErrorCode ierr; 748 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 749 PetscInt *row,*col; 750 const PetscScalar *av, *bv,*v1,*v2; 751 PetscScalar *val; 752 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 753 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 754 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 755 #if defined(PETSC_USE_COMPLEX) 756 PetscBool hermitian; 757 #endif 758 759 PetscFunctionBegin; 760 #if defined(PETSC_USE_COMPLEX) 761 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 762 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 763 #endif 764 ai = aa->i; 765 aj = aa->j; 766 adiag = aa->diag; 767 bi = bb->i; 768 bj = bb->j; 769 garray = mat->garray; 770 av = aa->a; 771 bv = bb->a; 772 773 rstart = A->rmap->rstart; 774 775 if (reuse == MAT_INITIAL_MATRIX) { 776 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 777 nzb = 0; /* num of upper triangular entries in mat->B */ 778 for (i=0; i<m; i++) { 779 nza += (ai[i+1] - adiag[i]); 780 countB = bi[i+1] - bi[i]; 781 bjj = bj + bi[i]; 782 for (j=0; j<countB; j++) { 783 if (garray[bjj[j]] > rstart) nzb++; 784 } 785 } 786 787 nz = nza + nzb; /* total nz of upper triangular part of mat */ 788 *nnz = nz; 789 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 790 col = row + nz; 791 val = (PetscScalar*)(col + nz); 792 793 *r = row; *c = col; *v = val; 794 } else { 795 row = *r; col = *c; val = *v; 796 } 797 798 jj = 0; irow = rstart; 799 for (i=0; i<m; i++) { 800 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 801 v1 = av + adiag[i]; 802 countA = ai[i+1] - adiag[i]; 803 countB = bi[i+1] - bi[i]; 804 bjj = bj + bi[i]; 805 v2 = bv + bi[i]; 806 807 /* A-part */ 808 for (j=0; j<countA; j++) { 809 if (reuse == MAT_INITIAL_MATRIX) { 810 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 811 } 812 val[jj++] = v1[j]; 813 } 814 815 /* B-part */ 816 for (j=0; j < countB; j++) { 817 if (garray[bjj[j]] > rstart) { 818 if (reuse == MAT_INITIAL_MATRIX) { 819 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 820 } 821 val[jj++] = v2[j]; 822 } 823 } 824 irow++; 825 } 826 PetscFunctionReturn(0); 827 } 828 829 PetscErrorCode MatDestroy_MUMPS(Mat A) 830 { 831 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 836 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 837 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 838 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 839 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 840 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 841 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 842 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 843 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 844 mumps->id.job = JOB_END; 845 PetscMUMPS_c(mumps); 846 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 847 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 848 #endif 849 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 850 ierr = PetscFree(A->data);CHKERRQ(ierr); 851 852 /* clear composed functions */ 853 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 854 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 855 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 870 { 871 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 872 PetscScalar *array; 873 Vec b_seq; 874 IS is_iden,is_petsc; 875 PetscErrorCode ierr; 876 PetscInt i; 877 PetscBool second_solve = PETSC_FALSE; 878 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 879 880 PetscFunctionBegin; 881 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 882 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 883 884 if (A->factorerrortype) { 885 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 886 ierr = VecSetInf(x);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 mumps->id.ICNTL(20)= 0; /* dense RHS */ 891 mumps->id.nrhs = 1; 892 b_seq = mumps->b_seq; 893 if (mumps->petsc_size > 1) { 894 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 895 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 898 } else { /* petsc_size == 1 */ 899 ierr = VecCopy(b,x);CHKERRQ(ierr); 900 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 901 } 902 if (!mumps->myid) { /* define rhs on the host */ 903 mumps->id.nrhs = 1; 904 mumps->id.rhs = (MumpsScalar*)array; 905 } 906 907 /* 908 handle condensation step of Schur complement (if any) 909 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 910 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 911 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 912 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 913 */ 914 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 915 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 916 second_solve = PETSC_TRUE; 917 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 918 } 919 /* solve phase */ 920 /*-------------*/ 921 mumps->id.job = JOB_SOLVE; 922 PetscMUMPS_c(mumps); 923 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 924 925 /* handle expansion step of Schur complement (if any) */ 926 if (second_solve) { 927 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 928 } 929 930 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 931 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 932 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 933 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 934 } 935 if (!mumps->scat_sol) { /* create scatter scat_sol */ 936 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 937 for (i=0; i<mumps->id.lsol_loc; i++) { 938 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 939 } 940 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 941 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 942 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 943 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 944 945 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 946 } 947 948 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 } 951 952 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 953 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 954 955 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 960 { 961 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 mumps->id.ICNTL(9) = 0; 966 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 967 mumps->id.ICNTL(9) = 1; 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 972 { 973 PetscErrorCode ierr; 974 Mat Bt = NULL; 975 PetscBool flg, flgT; 976 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 977 PetscInt i,nrhs,M; 978 PetscScalar *array; 979 const PetscScalar *rbray; 980 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 981 PetscScalar *bray,*sol_loc,*sol_loc_save; 982 IS is_to,is_from; 983 PetscInt k,proc,j,m,myrstart; 984 const PetscInt *rstart; 985 Vec v_mpi,b_seq,msol_loc; 986 VecScatter scat_rhs,scat_sol; 987 PetscScalar *aa; 988 PetscInt spnr,*ia,*ja; 989 Mat_MPIAIJ *b = NULL; 990 991 PetscFunctionBegin; 992 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 993 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 994 995 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 996 if (flg) { /* dense B */ 997 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 998 mumps->id.ICNTL(20)= 0; /* dense RHS */ 999 } else { /* sparse B */ 1000 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1001 if (flgT) { /* input B is transpose of actural RHS matrix, 1002 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1003 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1004 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1005 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1006 } 1007 1008 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1009 mumps->id.nrhs = nrhs; 1010 mumps->id.lrhs = M; 1011 mumps->id.rhs = NULL; 1012 1013 if (mumps->petsc_size == 1) { 1014 PetscScalar *aa; 1015 PetscInt spnr,*ia,*ja; 1016 PetscBool second_solve = PETSC_FALSE; 1017 1018 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1019 mumps->id.rhs = (MumpsScalar*)array; 1020 1021 if (!Bt) { /* dense B */ 1022 /* copy B to X */ 1023 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1024 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1025 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1026 } else { /* sparse B */ 1027 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1028 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1029 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1030 /* mumps requires ia and ja start at 1! */ 1031 mumps->id.irhs_ptr = ia; 1032 mumps->id.irhs_sparse = ja; 1033 mumps->id.nz_rhs = ia[spnr] - 1; 1034 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1035 } 1036 /* handle condensation step of Schur complement (if any) */ 1037 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1038 second_solve = PETSC_TRUE; 1039 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1040 } 1041 /* solve phase */ 1042 /*-------------*/ 1043 mumps->id.job = JOB_SOLVE; 1044 PetscMUMPS_c(mumps); 1045 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1046 1047 /* handle expansion step of Schur complement (if any) */ 1048 if (second_solve) { 1049 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1050 } 1051 if (Bt) { /* sparse B */ 1052 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1053 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1054 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1055 } 1056 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1061 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1062 1063 /* create msol_loc to hold mumps local solution */ 1064 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1065 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1066 1067 lsol_loc = mumps->id.lsol_loc; 1068 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1069 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1070 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1071 mumps->id.isol_loc = isol_loc; 1072 1073 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1074 1075 if (!Bt) { /* dense B */ 1076 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1077 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1078 0, re-arrange B into desired order, which is a local operation. 1079 */ 1080 1081 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1082 /* wrap dense rhs matrix B into a vector v_mpi */ 1083 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1084 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1085 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1086 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1087 1088 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1089 if (!mumps->myid) { 1090 PetscInt *idx; 1091 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1092 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1093 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1094 k = 0; 1095 for (proc=0; proc<mumps->petsc_size; proc++){ 1096 for (j=0; j<nrhs; j++){ 1097 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1098 } 1099 } 1100 1101 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1102 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1103 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1104 } else { 1105 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1106 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1107 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1108 } 1109 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1110 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1112 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 1115 if (!mumps->myid) { /* define rhs on the host */ 1116 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1117 mumps->id.rhs = (MumpsScalar*)bray; 1118 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1119 } 1120 1121 } else { /* sparse B */ 1122 b = (Mat_MPIAIJ*)Bt->data; 1123 1124 /* wrap dense X into a vector v_mpi */ 1125 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1126 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1127 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1128 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1129 1130 if (!mumps->myid) { 1131 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1132 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1133 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1134 /* mumps requires ia and ja start at 1! */ 1135 mumps->id.irhs_ptr = ia; 1136 mumps->id.irhs_sparse = ja; 1137 mumps->id.nz_rhs = ia[spnr] - 1; 1138 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1139 } else { 1140 mumps->id.irhs_ptr = NULL; 1141 mumps->id.irhs_sparse = NULL; 1142 mumps->id.nz_rhs = 0; 1143 mumps->id.rhs_sparse = NULL; 1144 } 1145 } 1146 1147 /* solve phase */ 1148 /*-------------*/ 1149 mumps->id.job = JOB_SOLVE; 1150 PetscMUMPS_c(mumps); 1151 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1152 1153 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1154 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1155 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1156 1157 /* create scatter scat_sol */ 1158 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1159 /* iidx: index for scatter mumps solution to petsc X */ 1160 1161 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1162 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1163 for (i=0; i<lsol_loc; i++) { 1164 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1165 1166 for (proc=0; proc<mumps->petsc_size; proc++){ 1167 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1168 myrstart = rstart[proc]; 1169 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1170 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1171 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1172 break; 1173 } 1174 } 1175 1176 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1177 } 1178 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1179 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1180 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1181 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1182 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1183 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1184 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1185 1186 /* free spaces */ 1187 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1188 mumps->id.isol_loc = isol_loc_save; 1189 1190 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1191 ierr = PetscFree(idxx);CHKERRQ(ierr); 1192 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1193 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1194 if (Bt) { 1195 if (!mumps->myid) { 1196 b = (Mat_MPIAIJ*)Bt->data; 1197 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1198 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1199 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1200 } 1201 } else { 1202 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1203 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1204 } 1205 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1206 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1211 { 1212 PetscErrorCode ierr; 1213 PetscBool flg; 1214 Mat B; 1215 1216 PetscFunctionBegin; 1217 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1218 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1219 1220 /* Create B=Bt^T that uses Bt's data structure */ 1221 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1222 1223 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1224 ierr = MatDestroy(&B);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #if !defined(PETSC_USE_COMPLEX) 1229 /* 1230 input: 1231 F: numeric factor 1232 output: 1233 nneg: total number of negative pivots 1234 nzero: total number of zero pivots 1235 npos: (global dimension of F) - nneg - nzero 1236 */ 1237 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1238 { 1239 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1240 PetscErrorCode ierr; 1241 PetscMPIInt size; 1242 1243 PetscFunctionBegin; 1244 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1245 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1246 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1247 1248 if (nneg) *nneg = mumps->id.INFOG(12); 1249 if (nzero || npos) { 1250 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1251 if (nzero) *nzero = mumps->id.INFOG(28); 1252 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 #endif 1257 1258 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1259 { 1260 PetscErrorCode ierr; 1261 PetscInt i,nz=0,*irn,*jcn=0; 1262 PetscScalar *val=0; 1263 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1264 1265 PetscFunctionBegin; 1266 if (mumps->omp_comm_size > 1) { 1267 if (reuse == MAT_INITIAL_MATRIX) { 1268 /* master first gathers counts of nonzeros to receive */ 1269 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1270 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1271 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1272 1273 /* master allocates memory to receive nonzeros */ 1274 if (mumps->is_omp_master) { 1275 displs[0] = 0; 1276 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1277 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1278 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1279 jcn = irn + nz; 1280 val = (PetscScalar*)(jcn + nz); 1281 } 1282 1283 /* save the gatherv plan */ 1284 mumps->mpinz = mpinz; /* used as send count */ 1285 mumps->recvcount = recvcount; 1286 mumps->displs = displs; 1287 1288 /* master gathers nonzeros */ 1289 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1290 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1291 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1292 1293 /* master frees its row/col/val and replaces them with bigger arrays */ 1294 if (mumps->is_omp_master) { 1295 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1296 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1297 mumps->irn = irn; 1298 mumps->jcn = jcn; 1299 mumps->val = val; 1300 } 1301 } else { 1302 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1303 } 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1309 { 1310 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1311 PetscErrorCode ierr; 1312 PetscBool isMPIAIJ; 1313 1314 PetscFunctionBegin; 1315 if (mumps->id.INFOG(1) < 0) { 1316 if (mumps->id.INFOG(1) == -6) { 1317 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1318 } 1319 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1324 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1325 1326 /* numerical factorization phase */ 1327 /*-------------------------------*/ 1328 mumps->id.job = JOB_FACTNUMERIC; 1329 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1330 if (!mumps->myid) { 1331 mumps->id.a = (MumpsScalar*)mumps->val; 1332 } 1333 } else { 1334 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1335 } 1336 PetscMUMPS_c(mumps); 1337 if (mumps->id.INFOG(1) < 0) { 1338 if (A->erroriffailure) { 1339 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1340 } else { 1341 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1342 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1343 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1344 } else if (mumps->id.INFOG(1) == -13) { 1345 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1346 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1347 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1348 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1349 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1350 } else { 1351 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1352 F->factorerrortype = MAT_FACTOR_OTHER; 1353 } 1354 } 1355 } 1356 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1357 1358 F->assembled = PETSC_TRUE; 1359 mumps->matstruc = SAME_NONZERO_PATTERN; 1360 if (F->schur) { /* reset Schur status to unfactored */ 1361 #if defined(PETSC_HAVE_CUDA) 1362 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1363 #endif 1364 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1365 mumps->id.ICNTL(19) = 2; 1366 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1367 } 1368 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1369 } 1370 1371 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1372 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1373 1374 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1375 if (mumps->petsc_size > 1) { 1376 PetscInt lsol_loc; 1377 PetscScalar *sol_loc; 1378 1379 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1380 1381 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1382 if (mumps->x_seq) { 1383 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1384 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1385 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1386 } 1387 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1388 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1389 mumps->id.lsol_loc = lsol_loc; 1390 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1391 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1392 } 1393 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /* Sets MUMPS options from the options database */ 1398 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1399 { 1400 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1401 PetscErrorCode ierr; 1402 PetscInt icntl,info[80],i,ninfo=80; 1403 PetscBool flg; 1404 1405 PetscFunctionBegin; 1406 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1407 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1408 if (flg) mumps->id.ICNTL(1) = icntl; 1409 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1410 if (flg) mumps->id.ICNTL(2) = icntl; 1411 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1412 if (flg) mumps->id.ICNTL(3) = icntl; 1413 1414 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1415 if (flg) mumps->id.ICNTL(4) = icntl; 1416 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1417 1418 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1419 if (flg) mumps->id.ICNTL(6) = icntl; 1420 1421 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1422 if (flg) { 1423 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1424 else mumps->id.ICNTL(7) = icntl; 1425 } 1426 1427 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1428 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1429 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1430 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1431 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1432 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1433 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1434 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1435 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1436 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1437 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1438 } 1439 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1440 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1441 1442 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1443 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1444 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1445 if (mumps->id.ICNTL(24)) { 1446 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1447 } 1448 1449 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1450 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1451 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1452 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1453 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1454 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1455 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1456 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1458 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1460 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1461 1462 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1463 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1464 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1465 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1466 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1467 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1468 1469 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1470 1471 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1472 if (ninfo) { 1473 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1474 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1475 mumps->ninfo = ninfo; 1476 for (i=0; i<ninfo; i++) { 1477 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1478 else mumps->info[i] = info[i]; 1479 } 1480 } 1481 1482 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1487 { 1488 PetscErrorCode ierr; 1489 PetscInt nthreads=0; 1490 1491 PetscFunctionBegin; 1492 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1493 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1494 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1495 1496 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1497 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1498 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1499 if (mumps->use_petsc_omp_support) { 1500 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1501 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1502 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1503 #else 1504 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1505 #endif 1506 } else { 1507 mumps->omp_comm = PETSC_COMM_SELF; 1508 mumps->mumps_comm = mumps->petsc_comm; 1509 mumps->is_omp_master = PETSC_TRUE; 1510 } 1511 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1512 1513 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1514 mumps->id.job = JOB_INIT; 1515 mumps->id.par = 1; /* host participates factorizaton and solve */ 1516 mumps->id.sym = mumps->sym; 1517 1518 PetscMUMPS_c(mumps); 1519 1520 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1521 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1522 */ 1523 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1524 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1525 1526 mumps->scat_rhs = NULL; 1527 mumps->scat_sol = NULL; 1528 1529 /* set PETSc-MUMPS default options - override MUMPS default */ 1530 mumps->id.ICNTL(3) = 0; 1531 mumps->id.ICNTL(4) = 0; 1532 if (mumps->petsc_size == 1) { 1533 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1534 } else { 1535 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1536 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1537 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1538 } 1539 1540 /* schur */ 1541 mumps->id.size_schur = 0; 1542 mumps->id.listvar_schur = NULL; 1543 mumps->id.schur = NULL; 1544 mumps->sizeredrhs = 0; 1545 mumps->schur_sol = NULL; 1546 mumps->schur_sizesol = 0; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1551 { 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1556 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1557 if (mumps->id.INFOG(1) < 0) { 1558 if (A->erroriffailure) { 1559 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1560 } else { 1561 if (mumps->id.INFOG(1) == -6) { 1562 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1563 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1564 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1565 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1566 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1567 } else { 1568 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1569 F->factorerrortype = MAT_FACTOR_OTHER; 1570 } 1571 } 1572 } 1573 PetscFunctionReturn(0); 1574 } 1575 1576 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1577 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1578 { 1579 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1580 PetscErrorCode ierr; 1581 Vec b; 1582 const PetscInt M = A->rmap->N; 1583 1584 PetscFunctionBegin; 1585 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1586 1587 /* Set MUMPS options from the options database */ 1588 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1589 1590 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1591 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1592 1593 /* analysis phase */ 1594 /*----------------*/ 1595 mumps->id.job = JOB_FACTSYMBOLIC; 1596 mumps->id.n = M; 1597 switch (mumps->id.ICNTL(18)) { 1598 case 0: /* centralized assembled matrix input */ 1599 if (!mumps->myid) { 1600 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1601 if (mumps->id.ICNTL(6)>1) { 1602 mumps->id.a = (MumpsScalar*)mumps->val; 1603 } 1604 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1605 /* 1606 PetscBool flag; 1607 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1608 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1609 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1610 */ 1611 if (!mumps->myid) { 1612 const PetscInt *idx; 1613 PetscInt i,*perm_in; 1614 1615 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1616 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1617 1618 mumps->id.perm_in = perm_in; 1619 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1620 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1621 } 1622 } 1623 } 1624 break; 1625 case 3: /* distributed assembled matrix input (size>1) */ 1626 mumps->id.nz_loc = mumps->nz; 1627 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1628 if (mumps->id.ICNTL(6)>1) { 1629 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1630 } 1631 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1632 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1633 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1634 ierr = VecDestroy(&b);CHKERRQ(ierr); 1635 break; 1636 } 1637 PetscMUMPS_c(mumps); 1638 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1639 1640 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1641 F->ops->solve = MatSolve_MUMPS; 1642 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1643 F->ops->matsolve = MatMatSolve_MUMPS; 1644 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1645 PetscFunctionReturn(0); 1646 } 1647 1648 /* Note the Petsc r and c permutations are ignored */ 1649 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1650 { 1651 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1652 PetscErrorCode ierr; 1653 Vec b; 1654 const PetscInt M = A->rmap->N; 1655 1656 PetscFunctionBegin; 1657 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1658 1659 /* Set MUMPS options from the options database */ 1660 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1661 1662 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1663 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1664 1665 /* analysis phase */ 1666 /*----------------*/ 1667 mumps->id.job = JOB_FACTSYMBOLIC; 1668 mumps->id.n = M; 1669 switch (mumps->id.ICNTL(18)) { 1670 case 0: /* centralized assembled matrix input */ 1671 if (!mumps->myid) { 1672 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1673 if (mumps->id.ICNTL(6)>1) { 1674 mumps->id.a = (MumpsScalar*)mumps->val; 1675 } 1676 } 1677 break; 1678 case 3: /* distributed assembled matrix input (size>1) */ 1679 mumps->id.nz_loc = mumps->nz; 1680 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1681 if (mumps->id.ICNTL(6)>1) { 1682 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1683 } 1684 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1685 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1686 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1687 ierr = VecDestroy(&b);CHKERRQ(ierr); 1688 break; 1689 } 1690 PetscMUMPS_c(mumps); 1691 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1692 1693 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1694 F->ops->solve = MatSolve_MUMPS; 1695 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 /* Note the Petsc r permutation and factor info are ignored */ 1700 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1701 { 1702 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1703 PetscErrorCode ierr; 1704 Vec b; 1705 const PetscInt M = A->rmap->N; 1706 1707 PetscFunctionBegin; 1708 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1709 1710 /* Set MUMPS options from the options database */ 1711 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1712 1713 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1714 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1715 1716 /* analysis phase */ 1717 /*----------------*/ 1718 mumps->id.job = JOB_FACTSYMBOLIC; 1719 mumps->id.n = M; 1720 switch (mumps->id.ICNTL(18)) { 1721 case 0: /* centralized assembled matrix input */ 1722 if (!mumps->myid) { 1723 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1724 if (mumps->id.ICNTL(6)>1) { 1725 mumps->id.a = (MumpsScalar*)mumps->val; 1726 } 1727 } 1728 break; 1729 case 3: /* distributed assembled matrix input (size>1) */ 1730 mumps->id.nz_loc = mumps->nz; 1731 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1732 if (mumps->id.ICNTL(6)>1) { 1733 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1734 } 1735 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1736 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1737 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1738 ierr = VecDestroy(&b);CHKERRQ(ierr); 1739 break; 1740 } 1741 PetscMUMPS_c(mumps); 1742 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1743 1744 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1745 F->ops->solve = MatSolve_MUMPS; 1746 F->ops->solvetranspose = MatSolve_MUMPS; 1747 F->ops->matsolve = MatMatSolve_MUMPS; 1748 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1749 #if defined(PETSC_USE_COMPLEX) 1750 F->ops->getinertia = NULL; 1751 #else 1752 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1753 #endif 1754 PetscFunctionReturn(0); 1755 } 1756 1757 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1758 { 1759 PetscErrorCode ierr; 1760 PetscBool iascii; 1761 PetscViewerFormat format; 1762 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1763 1764 PetscFunctionBegin; 1765 /* check if matrix is mumps type */ 1766 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1767 1768 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1769 if (iascii) { 1770 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1771 if (format == PETSC_VIEWER_ASCII_INFO) { 1772 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1773 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1774 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1785 if (mumps->id.ICNTL(11)>0) { 1786 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1787 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1788 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1792 } 1793 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1795 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1796 /* ICNTL(15-17) not used */ 1797 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1798 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1803 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1810 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1817 1818 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1820 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1824 1825 /* infomation local to each processor */ 1826 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1828 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1829 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1831 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1832 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1834 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1835 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1836 1837 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1838 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1839 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1840 1841 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1842 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1843 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1844 1845 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1846 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1847 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1848 1849 if (mumps->ninfo && mumps->ninfo <= 80){ 1850 PetscInt i; 1851 for (i=0; i<mumps->ninfo; i++){ 1852 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1853 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1854 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1855 } 1856 } 1857 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1858 1859 if (!mumps->myid) { /* information from the host */ 1860 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1862 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1863 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1864 1865 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1866 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1868 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1869 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1870 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1871 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1872 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1873 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1874 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1875 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1876 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1877 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1878 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1879 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1880 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1881 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1882 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1883 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1884 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1885 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1886 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1887 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1888 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1890 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1891 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1892 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1899 } 1900 } 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1906 { 1907 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1908 1909 PetscFunctionBegin; 1910 info->block_size = 1.0; 1911 info->nz_allocated = mumps->id.INFOG(20); 1912 info->nz_used = mumps->id.INFOG(20); 1913 info->nz_unneeded = 0.0; 1914 info->assemblies = 0.0; 1915 info->mallocs = 0.0; 1916 info->memory = 0.0; 1917 info->fill_ratio_given = 0; 1918 info->fill_ratio_needed = 0; 1919 info->factor_mallocs = 0; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 /* -------------------------------------------------------------------------------------------*/ 1924 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1925 { 1926 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1927 const PetscInt *idxs; 1928 PetscInt size,i; 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1933 if (mumps->petsc_size > 1) { 1934 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1935 1936 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1937 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1938 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1939 } 1940 if (mumps->id.size_schur != size) { 1941 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1942 mumps->id.size_schur = size; 1943 mumps->id.schur_lld = size; 1944 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1945 } 1946 1947 /* Schur complement matrix */ 1948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1949 if (mumps->sym == 1) { 1950 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1951 } 1952 1953 /* MUMPS expects Fortran style indices */ 1954 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1955 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1956 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1957 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1958 if (mumps->petsc_size > 1) { 1959 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1960 } else { 1961 if (F->factortype == MAT_FACTOR_LU) { 1962 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1963 } else { 1964 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1965 } 1966 } 1967 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1968 mumps->id.ICNTL(26) = -1; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 /* -------------------------------------------------------------------------------------------*/ 1973 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1974 { 1975 Mat St; 1976 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1977 PetscScalar *array; 1978 #if defined(PETSC_USE_COMPLEX) 1979 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1980 #endif 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1985 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1986 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1987 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1988 ierr = MatSetUp(St);CHKERRQ(ierr); 1989 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1990 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1991 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1992 PetscInt i,j,N=mumps->id.size_schur; 1993 for (i=0;i<N;i++) { 1994 for (j=0;j<N;j++) { 1995 #if !defined(PETSC_USE_COMPLEX) 1996 PetscScalar val = mumps->id.schur[i*N+j]; 1997 #else 1998 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 1999 #endif 2000 array[j*N+i] = val; 2001 } 2002 } 2003 } else { /* stored by columns */ 2004 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2005 } 2006 } else { /* either full or lower-triangular (not packed) */ 2007 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2008 PetscInt i,j,N=mumps->id.size_schur; 2009 for (i=0;i<N;i++) { 2010 for (j=i;j<N;j++) { 2011 #if !defined(PETSC_USE_COMPLEX) 2012 PetscScalar val = mumps->id.schur[i*N+j]; 2013 #else 2014 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2015 #endif 2016 array[i*N+j] = val; 2017 array[j*N+i] = val; 2018 } 2019 } 2020 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2021 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2022 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2023 PetscInt i,j,N=mumps->id.size_schur; 2024 for (i=0;i<N;i++) { 2025 for (j=0;j<i+1;j++) { 2026 #if !defined(PETSC_USE_COMPLEX) 2027 PetscScalar val = mumps->id.schur[i*N+j]; 2028 #else 2029 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2030 #endif 2031 array[i*N+j] = val; 2032 array[j*N+i] = val; 2033 } 2034 } 2035 } 2036 } 2037 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2038 *S = St; 2039 PetscFunctionReturn(0); 2040 } 2041 2042 /* -------------------------------------------------------------------------------------------*/ 2043 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2044 { 2045 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2046 2047 PetscFunctionBegin; 2048 mumps->id.ICNTL(icntl) = ival; 2049 PetscFunctionReturn(0); 2050 } 2051 2052 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2053 { 2054 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2055 2056 PetscFunctionBegin; 2057 *ival = mumps->id.ICNTL(icntl); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /*@ 2062 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2063 2064 Logically Collective on Mat 2065 2066 Input Parameters: 2067 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2068 . icntl - index of MUMPS parameter array ICNTL() 2069 - ival - value of MUMPS ICNTL(icntl) 2070 2071 Options Database: 2072 . -mat_mumps_icntl_<icntl> <ival> 2073 2074 Level: beginner 2075 2076 References: 2077 . MUMPS Users' Guide 2078 2079 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2080 @*/ 2081 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2082 { 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 PetscValidType(F,1); 2087 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2088 PetscValidLogicalCollectiveInt(F,icntl,2); 2089 PetscValidLogicalCollectiveInt(F,ival,3); 2090 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2091 PetscFunctionReturn(0); 2092 } 2093 2094 /*@ 2095 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2096 2097 Logically Collective on Mat 2098 2099 Input Parameters: 2100 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2101 - icntl - index of MUMPS parameter array ICNTL() 2102 2103 Output Parameter: 2104 . ival - value of MUMPS ICNTL(icntl) 2105 2106 Level: beginner 2107 2108 References: 2109 . MUMPS Users' Guide 2110 2111 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2112 @*/ 2113 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2114 { 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 PetscValidType(F,1); 2119 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2120 PetscValidLogicalCollectiveInt(F,icntl,2); 2121 PetscValidIntPointer(ival,3); 2122 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* -------------------------------------------------------------------------------------------*/ 2127 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2128 { 2129 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2130 2131 PetscFunctionBegin; 2132 mumps->id.CNTL(icntl) = val; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2137 { 2138 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2139 2140 PetscFunctionBegin; 2141 *val = mumps->id.CNTL(icntl); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@ 2146 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2147 2148 Logically Collective on Mat 2149 2150 Input Parameters: 2151 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2152 . icntl - index of MUMPS parameter array CNTL() 2153 - val - value of MUMPS CNTL(icntl) 2154 2155 Options Database: 2156 . -mat_mumps_cntl_<icntl> <val> 2157 2158 Level: beginner 2159 2160 References: 2161 . MUMPS Users' Guide 2162 2163 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2164 @*/ 2165 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2166 { 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 PetscValidType(F,1); 2171 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2172 PetscValidLogicalCollectiveInt(F,icntl,2); 2173 PetscValidLogicalCollectiveReal(F,val,3); 2174 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 /*@ 2179 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2180 2181 Logically Collective on Mat 2182 2183 Input Parameters: 2184 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2185 - icntl - index of MUMPS parameter array CNTL() 2186 2187 Output Parameter: 2188 . val - value of MUMPS CNTL(icntl) 2189 2190 Level: beginner 2191 2192 References: 2193 . MUMPS Users' Guide 2194 2195 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2196 @*/ 2197 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2198 { 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 PetscValidType(F,1); 2203 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2204 PetscValidLogicalCollectiveInt(F,icntl,2); 2205 PetscValidRealPointer(val,3); 2206 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2211 { 2212 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2213 2214 PetscFunctionBegin; 2215 *info = mumps->id.INFO(icntl); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2220 { 2221 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2222 2223 PetscFunctionBegin; 2224 *infog = mumps->id.INFOG(icntl); 2225 PetscFunctionReturn(0); 2226 } 2227 2228 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2229 { 2230 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2231 2232 PetscFunctionBegin; 2233 *rinfo = mumps->id.RINFO(icntl); 2234 PetscFunctionReturn(0); 2235 } 2236 2237 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2238 { 2239 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2240 2241 PetscFunctionBegin; 2242 *rinfog = mumps->id.RINFOG(icntl); 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2247 { 2248 PetscErrorCode ierr; 2249 Mat Bt = NULL,Btseq = NULL; 2250 PetscBool flg; 2251 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2252 PetscScalar *aa; 2253 PetscInt spnr,*ia,*ja; 2254 2255 PetscFunctionBegin; 2256 PetscValidIntPointer(spRHS,2); 2257 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2258 if (flg) { 2259 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2260 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2261 2262 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2263 2264 if (mumps->petsc_size > 1) { 2265 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2266 Btseq = b->A; 2267 } else { 2268 Btseq = Bt; 2269 } 2270 2271 if (!mumps->myid) { 2272 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2273 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2274 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2275 2276 mumps->id.irhs_ptr = ia; 2277 mumps->id.irhs_sparse = ja; 2278 mumps->id.nz_rhs = ia[spnr] - 1; 2279 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2280 } else { 2281 mumps->id.irhs_ptr = NULL; 2282 mumps->id.irhs_sparse = NULL; 2283 mumps->id.nz_rhs = 0; 2284 mumps->id.rhs_sparse = NULL; 2285 } 2286 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2287 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2288 2289 /* solve phase */ 2290 /*-------------*/ 2291 mumps->id.job = JOB_SOLVE; 2292 PetscMUMPS_c(mumps); 2293 if (mumps->id.INFOG(1) < 0) 2294 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2295 2296 if (!mumps->myid) { 2297 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2298 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2299 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2300 } 2301 PetscFunctionReturn(0); 2302 } 2303 2304 /*@ 2305 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2306 2307 Logically Collective on Mat 2308 2309 Input Parameters: 2310 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2311 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2312 2313 Output Parameter: 2314 . spRHS - requested entries of inverse of A 2315 2316 Level: beginner 2317 2318 References: 2319 . MUMPS Users' Guide 2320 2321 .seealso: MatGetFactor(), MatCreateTranspose() 2322 @*/ 2323 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2324 { 2325 PetscErrorCode ierr; 2326 2327 PetscFunctionBegin; 2328 PetscValidType(F,1); 2329 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2330 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2335 { 2336 PetscErrorCode ierr; 2337 Mat spRHS; 2338 2339 PetscFunctionBegin; 2340 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2341 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2342 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 /*@ 2347 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2348 2349 Logically Collective on Mat 2350 2351 Input Parameters: 2352 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2353 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2354 2355 Output Parameter: 2356 . spRHST - requested entries of inverse of A^T 2357 2358 Level: beginner 2359 2360 References: 2361 . MUMPS Users' Guide 2362 2363 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2364 @*/ 2365 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2366 { 2367 PetscErrorCode ierr; 2368 PetscBool flg; 2369 2370 PetscFunctionBegin; 2371 PetscValidType(F,1); 2372 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2373 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2374 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2375 2376 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 /*@ 2381 MatMumpsGetInfo - Get MUMPS parameter INFO() 2382 2383 Logically Collective on Mat 2384 2385 Input Parameters: 2386 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2387 - icntl - index of MUMPS parameter array INFO() 2388 2389 Output Parameter: 2390 . ival - value of MUMPS INFO(icntl) 2391 2392 Level: beginner 2393 2394 References: 2395 . MUMPS Users' Guide 2396 2397 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2398 @*/ 2399 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2400 { 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 PetscValidType(F,1); 2405 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2406 PetscValidIntPointer(ival,3); 2407 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /*@ 2412 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2413 2414 Logically Collective on Mat 2415 2416 Input Parameters: 2417 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2418 - icntl - index of MUMPS parameter array INFOG() 2419 2420 Output Parameter: 2421 . ival - value of MUMPS INFOG(icntl) 2422 2423 Level: beginner 2424 2425 References: 2426 . MUMPS Users' Guide 2427 2428 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2429 @*/ 2430 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2431 { 2432 PetscErrorCode ierr; 2433 2434 PetscFunctionBegin; 2435 PetscValidType(F,1); 2436 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2437 PetscValidIntPointer(ival,3); 2438 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 /*@ 2443 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2444 2445 Logically Collective on Mat 2446 2447 Input Parameters: 2448 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2449 - icntl - index of MUMPS parameter array RINFO() 2450 2451 Output Parameter: 2452 . val - value of MUMPS RINFO(icntl) 2453 2454 Level: beginner 2455 2456 References: 2457 . MUMPS Users' Guide 2458 2459 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2460 @*/ 2461 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2462 { 2463 PetscErrorCode ierr; 2464 2465 PetscFunctionBegin; 2466 PetscValidType(F,1); 2467 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2468 PetscValidRealPointer(val,3); 2469 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 /*@ 2474 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2475 2476 Logically Collective on Mat 2477 2478 Input Parameters: 2479 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2480 - icntl - index of MUMPS parameter array RINFOG() 2481 2482 Output Parameter: 2483 . val - value of MUMPS RINFOG(icntl) 2484 2485 Level: beginner 2486 2487 References: 2488 . MUMPS Users' Guide 2489 2490 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2491 @*/ 2492 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2493 { 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 PetscValidType(F,1); 2498 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2499 PetscValidRealPointer(val,3); 2500 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /*MC 2505 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2506 distributed and sequential matrices via the external package MUMPS. 2507 2508 Works with MATAIJ and MATSBAIJ matrices 2509 2510 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2511 2512 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2513 2514 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2515 2516 Options Database Keys: 2517 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2518 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2519 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2520 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2521 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2522 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2523 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2524 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2525 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2526 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2527 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2528 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2529 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2530 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2531 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2532 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2533 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2534 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2535 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2536 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2537 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2538 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2539 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2540 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2541 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2542 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2543 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2544 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2545 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2546 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2547 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2548 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2549 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2550 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2551 2552 Level: beginner 2553 2554 Notes: 2555 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2556 2557 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2558 $ KSPGetPC(ksp,&pc); 2559 $ PCFactorGetMatrix(pc,&mat); 2560 $ MatMumpsGetInfo(mat,....); 2561 $ MatMumpsGetInfog(mat,....); etc. 2562 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2563 2564 Two modes to run MUMPS/PETSc with OpenMP 2565 2566 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2567 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2568 2569 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2570 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2571 2572 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2573 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2574 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2575 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2576 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2577 2578 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2579 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2580 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2581 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2582 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2583 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2584 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2585 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2586 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2587 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2588 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2589 examine the mapping result. 2590 2591 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2592 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2593 calls omp_set_num_threads(m) internally before calling MUMPS. 2594 2595 References: 2596 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2597 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2598 2599 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2600 2601 M*/ 2602 2603 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2604 { 2605 PetscFunctionBegin; 2606 *type = MATSOLVERMUMPS; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 /* MatGetFactor for Seq and MPI AIJ matrices */ 2611 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2612 { 2613 Mat B; 2614 PetscErrorCode ierr; 2615 Mat_MUMPS *mumps; 2616 PetscBool isSeqAIJ; 2617 2618 PetscFunctionBegin; 2619 /* Create the factorization matrix */ 2620 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2621 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2622 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2623 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2624 ierr = MatSetUp(B);CHKERRQ(ierr); 2625 2626 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2627 2628 B->ops->view = MatView_MUMPS; 2629 B->ops->getinfo = MatGetInfo_MUMPS; 2630 2631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2644 2645 if (ftype == MAT_FACTOR_LU) { 2646 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2647 B->factortype = MAT_FACTOR_LU; 2648 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2649 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2650 mumps->sym = 0; 2651 } else { 2652 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2653 B->factortype = MAT_FACTOR_CHOLESKY; 2654 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2655 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2656 #if defined(PETSC_USE_COMPLEX) 2657 mumps->sym = 2; 2658 #else 2659 if (A->spd_set && A->spd) mumps->sym = 1; 2660 else mumps->sym = 2; 2661 #endif 2662 } 2663 2664 /* set solvertype */ 2665 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2666 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2667 2668 B->ops->destroy = MatDestroy_MUMPS; 2669 B->data = (void*)mumps; 2670 2671 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2672 2673 *F = B; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2678 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2679 { 2680 Mat B; 2681 PetscErrorCode ierr; 2682 Mat_MUMPS *mumps; 2683 PetscBool isSeqSBAIJ; 2684 2685 PetscFunctionBegin; 2686 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2687 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2688 /* Create the factorization matrix */ 2689 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2690 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2691 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2692 ierr = MatSetUp(B);CHKERRQ(ierr); 2693 2694 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2695 if (isSeqSBAIJ) { 2696 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2697 } else { 2698 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2699 } 2700 2701 B->ops->getinfo = MatGetInfo_External; 2702 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2703 B->ops->view = MatView_MUMPS; 2704 2705 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2707 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2710 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2718 2719 B->factortype = MAT_FACTOR_CHOLESKY; 2720 #if defined(PETSC_USE_COMPLEX) 2721 mumps->sym = 2; 2722 #else 2723 if (A->spd_set && A->spd) mumps->sym = 1; 2724 else mumps->sym = 2; 2725 #endif 2726 2727 /* set solvertype */ 2728 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2729 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2730 2731 B->ops->destroy = MatDestroy_MUMPS; 2732 B->data = (void*)mumps; 2733 2734 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2735 2736 *F = B; 2737 PetscFunctionReturn(0); 2738 } 2739 2740 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2741 { 2742 Mat B; 2743 PetscErrorCode ierr; 2744 Mat_MUMPS *mumps; 2745 PetscBool isSeqBAIJ; 2746 2747 PetscFunctionBegin; 2748 /* Create the factorization matrix */ 2749 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2750 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2751 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2752 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2753 ierr = MatSetUp(B);CHKERRQ(ierr); 2754 2755 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2756 if (ftype == MAT_FACTOR_LU) { 2757 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2758 B->factortype = MAT_FACTOR_LU; 2759 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2760 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2761 mumps->sym = 0; 2762 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2763 2764 B->ops->getinfo = MatGetInfo_External; 2765 B->ops->view = MatView_MUMPS; 2766 2767 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2768 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2771 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2773 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2774 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2780 2781 /* set solvertype */ 2782 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2783 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2784 2785 B->ops->destroy = MatDestroy_MUMPS; 2786 B->data = (void*)mumps; 2787 2788 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2789 2790 *F = B; 2791 PetscFunctionReturn(0); 2792 } 2793 2794 /* MatGetFactor for Seq and MPI SELL matrices */ 2795 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2796 { 2797 Mat B; 2798 PetscErrorCode ierr; 2799 Mat_MUMPS *mumps; 2800 PetscBool isSeqSELL; 2801 2802 PetscFunctionBegin; 2803 /* Create the factorization matrix */ 2804 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2805 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2806 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2807 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2808 ierr = MatSetUp(B);CHKERRQ(ierr); 2809 2810 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2811 2812 B->ops->view = MatView_MUMPS; 2813 B->ops->getinfo = MatGetInfo_MUMPS; 2814 2815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2826 2827 if (ftype == MAT_FACTOR_LU) { 2828 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2829 B->factortype = MAT_FACTOR_LU; 2830 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2831 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2832 mumps->sym = 0; 2833 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2834 2835 /* set solvertype */ 2836 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2837 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2838 2839 B->ops->destroy = MatDestroy_MUMPS; 2840 B->data = (void*)mumps; 2841 2842 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2843 2844 *F = B; 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2849 { 2850 PetscErrorCode ierr; 2851 2852 PetscFunctionBegin; 2853 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2854 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2855 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2856 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2857 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2858 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2859 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2860 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2861 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2862 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2863 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867