1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 154 switch (schurstatus) { 155 case MAT_FACTOR_SCHUR_FACTORED: 156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 157 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 158 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 159 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 160 } else { 161 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 162 } 163 break; 164 case MAT_FACTOR_SCHUR_INVERTED: 165 sizesol = mumps->id.nrhs*mumps->id.size_schur; 166 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 167 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 168 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 169 mumps->schur_sizesol = sizesol; 170 } 171 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 172 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 173 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 174 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 175 } else { 176 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 177 } 178 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 179 break; 180 default: 181 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 182 break; 183 } 184 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 185 ierr = MatDestroy(&B);CHKERRQ(ierr); 186 ierr = MatDestroy(&X);CHKERRQ(ierr); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 191 { 192 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 193 PetscErrorCode ierr; 194 195 PetscFunctionBegin; 196 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 197 PetscFunctionReturn(0); 198 } 199 if (!expansion) { /* prepare for the condensation step */ 200 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 201 /* allocate MUMPS internal array to store reduced right-hand sides */ 202 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 203 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 204 mumps->id.lredrhs = mumps->id.size_schur; 205 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 206 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 207 } 208 mumps->id.ICNTL(26) = 1; /* condensation phase */ 209 } else { /* prepare for the expansion step */ 210 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 211 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 212 mumps->id.ICNTL(26) = 2; /* expansion phase */ 213 PetscMUMPS_c(mumps); 214 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 215 /* restore defaults */ 216 mumps->id.ICNTL(26) = -1; 217 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 218 if (mumps->id.nrhs > 1) { 219 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 220 mumps->id.lredrhs = 0; 221 mumps->sizeredrhs = 0; 222 } 223 } 224 PetscFunctionReturn(0); 225 } 226 227 /* 228 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 229 230 input: 231 A - matrix in aij,baij or sbaij format 232 shift - 0: C style output triple; 1: Fortran style output triple. 233 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 234 MAT_REUSE_MATRIX: only the values in v array are updated 235 output: 236 nnz - dim of r, c, and v (number of local nonzero entries of A) 237 r, c, v - row and col index, matrix values (matrix triples) 238 239 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 240 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 241 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 242 243 */ 244 245 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 246 { 247 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 248 PetscInt nz,rnz,i,j; 249 PetscErrorCode ierr; 250 PetscInt *row,*col; 251 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 252 253 PetscFunctionBegin; 254 *v=aa->a; 255 if (reuse == MAT_INITIAL_MATRIX) { 256 nz = aa->nz; 257 ai = aa->i; 258 aj = aa->j; 259 *nnz = nz; 260 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 261 col = row + nz; 262 263 nz = 0; 264 for (i=0; i<M; i++) { 265 rnz = ai[i+1] - ai[i]; 266 ajj = aj + ai[i]; 267 for (j=0; j<rnz; j++) { 268 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 269 } 270 } 271 *r = row; *c = col; 272 } 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 277 { 278 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 279 PetscInt *ptr; 280 281 PetscFunctionBegin; 282 *v = a->val; 283 if (reuse == MAT_INITIAL_MATRIX) { 284 PetscInt nz,i,j,row; 285 PetscErrorCode ierr; 286 287 nz = a->sliidx[a->totalslices]; 288 *nnz = nz; 289 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 290 *r = ptr; 291 *c = ptr + nz; 292 293 for (i=0; i<a->totalslices; i++) { 294 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 295 *ptr++ = 8*i + row + shift; 296 } 297 } 298 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 299 } 300 PetscFunctionReturn(0); 301 } 302 303 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 304 { 305 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 306 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 307 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 308 PetscErrorCode ierr; 309 PetscInt *row,*col; 310 311 PetscFunctionBegin; 312 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 313 M = A->rmap->N/bs; 314 *v = aa->a; 315 if (reuse == MAT_INITIAL_MATRIX) { 316 ai = aa->i; aj = aa->j; 317 nz = bs2*aa->nz; 318 *nnz = nz; 319 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 320 col = row + nz; 321 322 for (i=0; i<M; i++) { 323 ajj = aj + ai[i]; 324 rnz = ai[i+1] - ai[i]; 325 for (k=0; k<rnz; k++) { 326 for (j=0; j<bs; j++) { 327 for (m=0; m<bs; m++) { 328 row[idx] = i*bs + m + shift; 329 col[idx++] = bs*(ajj[k]) + j + shift; 330 } 331 } 332 } 333 } 334 *r = row; *c = col; 335 } 336 PetscFunctionReturn(0); 337 } 338 339 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 340 { 341 const PetscInt *ai, *aj,*ajj; 342 PetscInt nz,rnz,i,j,k,m,bs; 343 PetscErrorCode ierr; 344 PetscInt *row,*col; 345 PetscScalar *val; 346 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 347 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 348 #if defined(PETSC_USE_COMPLEX) 349 PetscBool hermitian; 350 #endif 351 352 PetscFunctionBegin; 353 #if defined(PETSC_USE_COMPLEX) 354 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 355 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 356 #endif 357 ai = aa->i; 358 aj = aa->j; 359 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 360 if (reuse == MAT_INITIAL_MATRIX) { 361 nz = aa->nz; 362 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 363 col = row + bs2*nz; 364 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 365 else val = aa->a; 366 367 *r = row; *c = col; *v = val; 368 } else { 369 if (bs == 1) *v = aa->a; 370 row = *r; col = *c; val = *v; 371 } 372 373 nz = 0; 374 if (bs>1) { 375 for (i=0; i<mbs; i++) { 376 rnz = ai[i+1] - ai[i]; 377 ajj = aj + ai[i]; 378 for (j=0; j<rnz; j++) { 379 for (k=0; k<bs; k++) { 380 for (m=0; m<bs; m++) { 381 if (ajj[j]>i || k>=m) { 382 if (reuse == MAT_INITIAL_MATRIX) { 383 row[nz] = i*bs + m + shift; 384 col[nz] = ajj[j]*bs + k + shift; 385 } 386 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 387 } 388 } 389 } 390 } 391 } 392 } else if (reuse == MAT_INITIAL_MATRIX) { 393 for (i=0; i<mbs; i++) { 394 rnz = ai[i+1] - ai[i]; 395 ajj = aj + ai[i]; 396 for (j=0; j<rnz; j++) { 397 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 398 } 399 } 400 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 401 } 402 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 403 PetscFunctionReturn(0); 404 } 405 406 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 407 { 408 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 409 PetscInt nz,rnz,i,j; 410 const PetscScalar *av,*v1; 411 PetscScalar *val; 412 PetscErrorCode ierr; 413 PetscInt *row,*col; 414 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 415 PetscBool missing; 416 #if defined(PETSC_USE_COMPLEX) 417 PetscBool hermitian; 418 #endif 419 420 PetscFunctionBegin; 421 #if defined(PETSC_USE_COMPLEX) 422 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 423 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 424 #endif 425 ai = aa->i; aj = aa->j; av = aa->a; 426 adiag = aa->diag; 427 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 428 if (reuse == MAT_INITIAL_MATRIX) { 429 /* count nz in the upper triangular part of A */ 430 nz = 0; 431 if (missing) { 432 for (i=0; i<M; i++) { 433 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 434 for (j=ai[i];j<ai[i+1];j++) { 435 if (aj[j] < i) continue; 436 nz++; 437 } 438 } else { 439 nz += ai[i+1] - adiag[i]; 440 } 441 } 442 } else { 443 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 444 } 445 *nnz = nz; 446 447 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 448 col = row + nz; 449 val = (PetscScalar*)(col + nz); 450 451 nz = 0; 452 if (missing) { 453 for (i=0; i<M; i++) { 454 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 455 for (j=ai[i];j<ai[i+1];j++) { 456 if (aj[j] < i) continue; 457 row[nz] = i+shift; 458 col[nz] = aj[j]+shift; 459 val[nz] = av[j]; 460 nz++; 461 } 462 } else { 463 rnz = ai[i+1] - adiag[i]; 464 ajj = aj + adiag[i]; 465 v1 = av + adiag[i]; 466 for (j=0; j<rnz; j++) { 467 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 468 } 469 } 470 } 471 } else { 472 for (i=0; i<M; i++) { 473 rnz = ai[i+1] - adiag[i]; 474 ajj = aj + adiag[i]; 475 v1 = av + adiag[i]; 476 for (j=0; j<rnz; j++) { 477 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 478 } 479 } 480 } 481 *r = row; *c = col; *v = val; 482 } else { 483 nz = 0; val = *v; 484 if (missing) { 485 for (i=0; i <M; i++) { 486 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 487 for (j=ai[i];j<ai[i+1];j++) { 488 if (aj[j] < i) continue; 489 val[nz++] = av[j]; 490 } 491 } else { 492 rnz = ai[i+1] - adiag[i]; 493 v1 = av + adiag[i]; 494 for (j=0; j<rnz; j++) { 495 val[nz++] = v1[j]; 496 } 497 } 498 } 499 } else { 500 for (i=0; i <M; i++) { 501 rnz = ai[i+1] - adiag[i]; 502 v1 = av + adiag[i]; 503 for (j=0; j<rnz; j++) { 504 val[nz++] = v1[j]; 505 } 506 } 507 } 508 } 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 513 { 514 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 515 PetscErrorCode ierr; 516 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 517 PetscInt *row,*col; 518 const PetscScalar *av,*bv,*v1,*v2; 519 PetscScalar *val; 520 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 521 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 522 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 523 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 524 #if defined(PETSC_USE_COMPLEX) 525 PetscBool hermitian; 526 #endif 527 528 PetscFunctionBegin; 529 #if defined(PETSC_USE_COMPLEX) 530 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 531 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 532 #endif 533 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 534 rstart = A->rmap->rstart; 535 ai = aa->i; 536 aj = aa->j; 537 bi = bb->i; 538 bj = bb->j; 539 av = aa->a; 540 bv = bb->a; 541 542 garray = mat->garray; 543 544 if (reuse == MAT_INITIAL_MATRIX) { 545 nz = aa->nz + bb->nz; 546 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 547 col = row + bs2*nz; 548 val = (PetscScalar*)(col + bs2*nz); 549 550 *r = row; *c = col; *v = val; 551 } else { 552 row = *r; col = *c; val = *v; 553 } 554 555 jj = 0; irow = rstart; 556 for (i=0; i<mbs; i++) { 557 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 558 countA = ai[i+1] - ai[i]; 559 countB = bi[i+1] - bi[i]; 560 bjj = bj + bi[i]; 561 v1 = av + ai[i]*bs2; 562 v2 = bv + bi[i]*bs2; 563 564 if (bs>1) { 565 /* A-part */ 566 for (j=0; j<countA; j++) { 567 for (k=0; k<bs; k++) { 568 for (m=0; m<bs; m++) { 569 if (rstart + ajj[j]*bs>irow || k>=m) { 570 if (reuse == MAT_INITIAL_MATRIX) { 571 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 572 } 573 val[jj++] = v1[j*bs2 + m + k*bs]; 574 } 575 } 576 } 577 } 578 579 /* B-part */ 580 for (j=0; j < countB; j++) { 581 for (k=0; k<bs; k++) { 582 for (m=0; m<bs; m++) { 583 if (reuse == MAT_INITIAL_MATRIX) { 584 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 585 } 586 val[jj++] = v2[j*bs2 + m + k*bs]; 587 } 588 } 589 } 590 } else { 591 /* A-part */ 592 for (j=0; j<countA; j++) { 593 if (reuse == MAT_INITIAL_MATRIX) { 594 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 595 } 596 val[jj++] = v1[j]; 597 } 598 599 /* B-part */ 600 for (j=0; j < countB; j++) { 601 if (reuse == MAT_INITIAL_MATRIX) { 602 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 603 } 604 val[jj++] = v2[j]; 605 } 606 } 607 irow+=bs; 608 } 609 *nnz = jj; 610 PetscFunctionReturn(0); 611 } 612 613 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 614 { 615 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 616 PetscErrorCode ierr; 617 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 618 PetscInt *row,*col; 619 const PetscScalar *av, *bv,*v1,*v2; 620 PetscScalar *val; 621 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 622 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 623 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 624 625 PetscFunctionBegin; 626 rstart = A->rmap->rstart; 627 ai = aa->i; 628 aj = aa->j; 629 bi = bb->i; 630 bj = bb->j; 631 av = aa->a; 632 bv = bb->a; 633 634 garray = mat->garray; 635 636 if (reuse == MAT_INITIAL_MATRIX) { 637 nz = aa->nz + bb->nz; 638 *nnz = nz; 639 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 640 col = row + nz; 641 val = (PetscScalar*)(col + nz); 642 643 *r = row; *c = col; *v = val; 644 } else { 645 row = *r; col = *c; val = *v; 646 } 647 648 jj = 0; irow = rstart; 649 for (i=0; i<m; i++) { 650 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 651 countA = ai[i+1] - ai[i]; 652 countB = bi[i+1] - bi[i]; 653 bjj = bj + bi[i]; 654 v1 = av + ai[i]; 655 v2 = bv + bi[i]; 656 657 /* A-part */ 658 for (j=0; j<countA; j++) { 659 if (reuse == MAT_INITIAL_MATRIX) { 660 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 661 } 662 val[jj++] = v1[j]; 663 } 664 665 /* B-part */ 666 for (j=0; j < countB; j++) { 667 if (reuse == MAT_INITIAL_MATRIX) { 668 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 669 } 670 val[jj++] = v2[j]; 671 } 672 irow++; 673 } 674 PetscFunctionReturn(0); 675 } 676 677 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 678 { 679 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 680 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 681 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 682 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 683 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 684 const PetscInt bs2=mat->bs2; 685 PetscErrorCode ierr; 686 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 687 PetscInt *row,*col; 688 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 689 PetscScalar *val; 690 691 PetscFunctionBegin; 692 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 693 if (reuse == MAT_INITIAL_MATRIX) { 694 nz = bs2*(aa->nz + bb->nz); 695 *nnz = nz; 696 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 697 col = row + nz; 698 val = (PetscScalar*)(col + nz); 699 700 *r = row; *c = col; *v = val; 701 } else { 702 row = *r; col = *c; val = *v; 703 } 704 705 jj = 0; irow = rstart; 706 for (i=0; i<mbs; i++) { 707 countA = ai[i+1] - ai[i]; 708 countB = bi[i+1] - bi[i]; 709 ajj = aj + ai[i]; 710 bjj = bj + bi[i]; 711 v1 = av + bs2*ai[i]; 712 v2 = bv + bs2*bi[i]; 713 714 idx = 0; 715 /* A-part */ 716 for (k=0; k<countA; k++) { 717 for (j=0; j<bs; j++) { 718 for (n=0; n<bs; n++) { 719 if (reuse == MAT_INITIAL_MATRIX) { 720 row[jj] = irow + n + shift; 721 col[jj] = rstart + bs*ajj[k] + j + shift; 722 } 723 val[jj++] = v1[idx++]; 724 } 725 } 726 } 727 728 idx = 0; 729 /* B-part */ 730 for (k=0; k<countB; k++) { 731 for (j=0; j<bs; j++) { 732 for (n=0; n<bs; n++) { 733 if (reuse == MAT_INITIAL_MATRIX) { 734 row[jj] = irow + n + shift; 735 col[jj] = bs*garray[bjj[k]] + j + shift; 736 } 737 val[jj++] = v2[idx++]; 738 } 739 } 740 } 741 irow += bs; 742 } 743 PetscFunctionReturn(0); 744 } 745 746 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 747 { 748 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 749 PetscErrorCode ierr; 750 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 751 PetscInt *row,*col; 752 const PetscScalar *av, *bv,*v1,*v2; 753 PetscScalar *val; 754 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 755 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 756 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 757 #if defined(PETSC_USE_COMPLEX) 758 PetscBool hermitian; 759 #endif 760 761 PetscFunctionBegin; 762 #if defined(PETSC_USE_COMPLEX) 763 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 764 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 765 #endif 766 ai = aa->i; 767 aj = aa->j; 768 adiag = aa->diag; 769 bi = bb->i; 770 bj = bb->j; 771 garray = mat->garray; 772 av = aa->a; 773 bv = bb->a; 774 775 rstart = A->rmap->rstart; 776 777 if (reuse == MAT_INITIAL_MATRIX) { 778 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 779 nzb = 0; /* num of upper triangular entries in mat->B */ 780 for (i=0; i<m; i++) { 781 nza += (ai[i+1] - adiag[i]); 782 countB = bi[i+1] - bi[i]; 783 bjj = bj + bi[i]; 784 for (j=0; j<countB; j++) { 785 if (garray[bjj[j]] > rstart) nzb++; 786 } 787 } 788 789 nz = nza + nzb; /* total nz of upper triangular part of mat */ 790 *nnz = nz; 791 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 792 col = row + nz; 793 val = (PetscScalar*)(col + nz); 794 795 *r = row; *c = col; *v = val; 796 } else { 797 row = *r; col = *c; val = *v; 798 } 799 800 jj = 0; irow = rstart; 801 for (i=0; i<m; i++) { 802 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 803 v1 = av + adiag[i]; 804 countA = ai[i+1] - adiag[i]; 805 countB = bi[i+1] - bi[i]; 806 bjj = bj + bi[i]; 807 v2 = bv + bi[i]; 808 809 /* A-part */ 810 for (j=0; j<countA; j++) { 811 if (reuse == MAT_INITIAL_MATRIX) { 812 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 813 } 814 val[jj++] = v1[j]; 815 } 816 817 /* B-part */ 818 for (j=0; j < countB; j++) { 819 if (garray[bjj[j]] > rstart) { 820 if (reuse == MAT_INITIAL_MATRIX) { 821 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 822 } 823 val[jj++] = v2[j]; 824 } 825 } 826 irow++; 827 } 828 PetscFunctionReturn(0); 829 } 830 831 PetscErrorCode MatDestroy_MUMPS(Mat A) 832 { 833 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 834 PetscErrorCode ierr; 835 836 PetscFunctionBegin; 837 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 838 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 839 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 840 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 841 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 842 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 843 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 844 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 845 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 846 mumps->id.job = JOB_END; 847 PetscMUMPS_c(mumps); 848 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 849 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 850 #endif 851 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 852 ierr = PetscFree(A->data);CHKERRQ(ierr); 853 854 /* clear composed functions */ 855 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 856 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 872 { 873 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 874 PetscScalar *array; 875 Vec b_seq; 876 IS is_iden,is_petsc; 877 PetscErrorCode ierr; 878 PetscInt i; 879 PetscBool second_solve = PETSC_FALSE; 880 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 881 882 PetscFunctionBegin; 883 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 884 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 885 886 if (A->factorerrortype) { 887 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 888 ierr = VecSetInf(x);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 mumps->id.ICNTL(20) = 0; /* dense RHS */ 893 mumps->id.nrhs = 1; 894 b_seq = mumps->b_seq; 895 if (mumps->petsc_size > 1) { 896 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 897 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 900 } else { /* petsc_size == 1 */ 901 ierr = VecCopy(b,x);CHKERRQ(ierr); 902 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 903 } 904 if (!mumps->myid) { /* define rhs on the host */ 905 mumps->id.nrhs = 1; 906 mumps->id.rhs = (MumpsScalar*)array; 907 } 908 909 /* 910 handle condensation step of Schur complement (if any) 911 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 912 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 913 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 914 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 915 */ 916 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 917 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 918 second_solve = PETSC_TRUE; 919 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 920 } 921 /* solve phase */ 922 /*-------------*/ 923 mumps->id.job = JOB_SOLVE; 924 PetscMUMPS_c(mumps); 925 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 926 927 /* handle expansion step of Schur complement (if any) */ 928 if (second_solve) { 929 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 930 } 931 932 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 933 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 934 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 935 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 936 } 937 if (!mumps->scat_sol) { /* create scatter scat_sol */ 938 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 939 for (i=0; i<mumps->id.lsol_loc; i++) { 940 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 941 } 942 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 943 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 944 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 945 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 946 947 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 948 } 949 950 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 } 953 954 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 955 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 956 957 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 962 { 963 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 mumps->id.ICNTL(9) = 0; 968 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 969 mumps->id.ICNTL(9) = 1; 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 974 { 975 PetscErrorCode ierr; 976 Mat Bt = NULL; 977 PetscBool flg, flgT; 978 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 979 PetscInt i,nrhs,M; 980 PetscScalar *array; 981 const PetscScalar *rbray; 982 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 983 PetscScalar *bray,*sol_loc,*sol_loc_save; 984 IS is_to,is_from; 985 PetscInt k,proc,j,m,myrstart; 986 const PetscInt *rstart; 987 Vec v_mpi,b_seq,msol_loc; 988 VecScatter scat_rhs,scat_sol; 989 PetscScalar *aa; 990 PetscInt spnr,*ia,*ja; 991 Mat_MPIAIJ *b = NULL; 992 993 PetscFunctionBegin; 994 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 995 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 996 997 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 998 if (flg) { /* dense B */ 999 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1000 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1001 } else { /* sparse B */ 1002 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1003 if (flgT) { /* input B is transpose of actural RHS matrix, 1004 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1005 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1006 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1007 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1008 } 1009 1010 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1011 mumps->id.nrhs = nrhs; 1012 mumps->id.lrhs = M; 1013 mumps->id.rhs = NULL; 1014 1015 if (mumps->petsc_size == 1) { 1016 PetscScalar *aa; 1017 PetscInt spnr,*ia,*ja; 1018 PetscBool second_solve = PETSC_FALSE; 1019 1020 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1021 mumps->id.rhs = (MumpsScalar*)array; 1022 1023 if (!Bt) { /* dense B */ 1024 /* copy B to X */ 1025 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1026 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1027 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1028 } else { /* sparse B */ 1029 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1030 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1031 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1032 /* mumps requires ia and ja start at 1! */ 1033 mumps->id.irhs_ptr = ia; 1034 mumps->id.irhs_sparse = ja; 1035 mumps->id.nz_rhs = ia[spnr] - 1; 1036 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1037 } 1038 /* handle condensation step of Schur complement (if any) */ 1039 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1040 second_solve = PETSC_TRUE; 1041 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1042 } 1043 /* solve phase */ 1044 /*-------------*/ 1045 mumps->id.job = JOB_SOLVE; 1046 PetscMUMPS_c(mumps); 1047 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1048 1049 /* handle expansion step of Schur complement (if any) */ 1050 if (second_solve) { 1051 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1052 } 1053 if (Bt) { /* sparse B */ 1054 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1055 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1056 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1057 } 1058 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1063 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1064 1065 /* create msol_loc to hold mumps local solution */ 1066 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1067 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1068 1069 lsol_loc = mumps->id.lsol_loc; 1070 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1071 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1072 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1073 mumps->id.isol_loc = isol_loc; 1074 1075 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1076 1077 if (!Bt) { /* dense B */ 1078 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1079 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1080 0, re-arrange B into desired order, which is a local operation. 1081 */ 1082 1083 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1084 /* wrap dense rhs matrix B into a vector v_mpi */ 1085 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1086 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1087 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1088 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1089 1090 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1091 if (!mumps->myid) { 1092 PetscInt *idx; 1093 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1094 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1095 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1096 k = 0; 1097 for (proc=0; proc<mumps->petsc_size; proc++){ 1098 for (j=0; j<nrhs; j++){ 1099 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1100 } 1101 } 1102 1103 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1104 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1105 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1106 } else { 1107 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1108 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1109 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1110 } 1111 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1112 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1114 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 1117 if (!mumps->myid) { /* define rhs on the host */ 1118 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1119 mumps->id.rhs = (MumpsScalar*)bray; 1120 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1121 } 1122 1123 } else { /* sparse B */ 1124 b = (Mat_MPIAIJ*)Bt->data; 1125 1126 /* wrap dense X into a vector v_mpi */ 1127 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1128 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1129 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1130 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1131 1132 if (!mumps->myid) { 1133 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1134 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1135 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1136 /* mumps requires ia and ja start at 1! */ 1137 mumps->id.irhs_ptr = ia; 1138 mumps->id.irhs_sparse = ja; 1139 mumps->id.nz_rhs = ia[spnr] - 1; 1140 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1141 } else { 1142 mumps->id.irhs_ptr = NULL; 1143 mumps->id.irhs_sparse = NULL; 1144 mumps->id.nz_rhs = 0; 1145 mumps->id.rhs_sparse = NULL; 1146 } 1147 } 1148 1149 /* solve phase */ 1150 /*-------------*/ 1151 mumps->id.job = JOB_SOLVE; 1152 PetscMUMPS_c(mumps); 1153 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1154 1155 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1156 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1157 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1158 1159 /* create scatter scat_sol */ 1160 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1161 /* iidx: index for scatter mumps solution to petsc X */ 1162 1163 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1164 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1165 for (i=0; i<lsol_loc; i++) { 1166 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1167 1168 for (proc=0; proc<mumps->petsc_size; proc++){ 1169 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1170 myrstart = rstart[proc]; 1171 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1172 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1173 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1174 break; 1175 } 1176 } 1177 1178 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1179 } 1180 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1181 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1182 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1183 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1184 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1185 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1186 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1187 1188 /* free spaces */ 1189 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1190 mumps->id.isol_loc = isol_loc_save; 1191 1192 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1193 ierr = PetscFree(idxx);CHKERRQ(ierr); 1194 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1195 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1196 if (Bt) { 1197 if (!mumps->myid) { 1198 b = (Mat_MPIAIJ*)Bt->data; 1199 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1200 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1201 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1202 } 1203 } else { 1204 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1205 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1206 } 1207 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1208 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1213 { 1214 PetscErrorCode ierr; 1215 PetscBool flg; 1216 Mat B; 1217 1218 PetscFunctionBegin; 1219 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1220 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1221 1222 /* Create B=Bt^T that uses Bt's data structure */ 1223 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1224 1225 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1226 ierr = MatDestroy(&B);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #if !defined(PETSC_USE_COMPLEX) 1231 /* 1232 input: 1233 F: numeric factor 1234 output: 1235 nneg: total number of negative pivots 1236 nzero: total number of zero pivots 1237 npos: (global dimension of F) - nneg - nzero 1238 */ 1239 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1240 { 1241 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1242 PetscErrorCode ierr; 1243 PetscMPIInt size; 1244 1245 PetscFunctionBegin; 1246 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1247 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1248 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1249 1250 if (nneg) *nneg = mumps->id.INFOG(12); 1251 if (nzero || npos) { 1252 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1253 if (nzero) *nzero = mumps->id.INFOG(28); 1254 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 #endif 1259 1260 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1261 { 1262 PetscErrorCode ierr; 1263 PetscInt i,nz=0,*irn,*jcn=0; 1264 PetscScalar *val=0; 1265 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1266 1267 PetscFunctionBegin; 1268 if (mumps->omp_comm_size > 1) { 1269 if (reuse == MAT_INITIAL_MATRIX) { 1270 /* master first gathers counts of nonzeros to receive */ 1271 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1272 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1273 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1274 1275 /* master allocates memory to receive nonzeros */ 1276 if (mumps->is_omp_master) { 1277 displs[0] = 0; 1278 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1279 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1280 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1281 jcn = irn + nz; 1282 val = (PetscScalar*)(jcn + nz); 1283 } 1284 1285 /* save the gatherv plan */ 1286 mumps->mpinz = mpinz; /* used as send count */ 1287 mumps->recvcount = recvcount; 1288 mumps->displs = displs; 1289 1290 /* master gathers nonzeros */ 1291 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1292 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1293 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1294 1295 /* master frees its row/col/val and replaces them with bigger arrays */ 1296 if (mumps->is_omp_master) { 1297 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1298 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1299 mumps->irn = irn; 1300 mumps->jcn = jcn; 1301 mumps->val = val; 1302 } 1303 } else { 1304 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1305 } 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1311 { 1312 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1313 PetscErrorCode ierr; 1314 PetscBool isMPIAIJ; 1315 1316 PetscFunctionBegin; 1317 if (mumps->id.INFOG(1) < 0) { 1318 if (mumps->id.INFOG(1) == -6) { 1319 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1320 } 1321 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1326 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1327 1328 /* numerical factorization phase */ 1329 /*-------------------------------*/ 1330 mumps->id.job = JOB_FACTNUMERIC; 1331 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1332 if (!mumps->myid) { 1333 mumps->id.a = (MumpsScalar*)mumps->val; 1334 } 1335 } else { 1336 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1337 } 1338 PetscMUMPS_c(mumps); 1339 if (mumps->id.INFOG(1) < 0) { 1340 if (A->erroriffailure) { 1341 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1342 } else { 1343 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1344 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1345 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1346 } else if (mumps->id.INFOG(1) == -13) { 1347 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1348 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1349 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1350 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1351 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1352 } else { 1353 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1354 F->factorerrortype = MAT_FACTOR_OTHER; 1355 } 1356 } 1357 } 1358 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1359 1360 F->assembled = PETSC_TRUE; 1361 mumps->matstruc = SAME_NONZERO_PATTERN; 1362 if (F->schur) { /* reset Schur status to unfactored */ 1363 #if defined(PETSC_HAVE_CUDA) 1364 F->schur->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 1365 #endif 1366 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1367 mumps->id.ICNTL(19) = 2; 1368 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1369 } 1370 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1371 } 1372 1373 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1374 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1375 1376 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1377 if (mumps->petsc_size > 1) { 1378 PetscInt lsol_loc; 1379 PetscScalar *sol_loc; 1380 1381 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1382 1383 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1384 if (mumps->x_seq) { 1385 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1386 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1387 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1388 } 1389 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1390 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1391 mumps->id.lsol_loc = lsol_loc; 1392 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1393 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1394 } 1395 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 /* Sets MUMPS options from the options database */ 1400 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1401 { 1402 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1403 PetscErrorCode ierr; 1404 PetscInt icntl,info[80],i,ninfo=80; 1405 PetscBool flg; 1406 1407 PetscFunctionBegin; 1408 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1409 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1410 if (flg) mumps->id.ICNTL(1) = icntl; 1411 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1412 if (flg) mumps->id.ICNTL(2) = icntl; 1413 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1414 if (flg) mumps->id.ICNTL(3) = icntl; 1415 1416 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1417 if (flg) mumps->id.ICNTL(4) = icntl; 1418 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1419 1420 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1421 if (flg) mumps->id.ICNTL(6) = icntl; 1422 1423 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1424 if (flg) { 1425 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1426 else mumps->id.ICNTL(7) = icntl; 1427 } 1428 1429 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1430 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1431 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1432 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1433 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1434 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1435 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1436 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1437 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1438 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1439 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1440 } 1441 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1442 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1443 1444 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1445 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1446 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1447 if (mumps->id.ICNTL(24)) { 1448 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1449 } 1450 1451 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1452 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1453 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1454 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1455 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1456 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1458 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1460 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1461 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1462 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1463 1464 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1465 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1466 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1467 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1468 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1469 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1470 1471 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1472 1473 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1474 if (ninfo) { 1475 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1476 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1477 mumps->ninfo = ninfo; 1478 for (i=0; i<ninfo; i++) { 1479 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1480 else mumps->info[i] = info[i]; 1481 } 1482 } 1483 1484 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1489 { 1490 PetscErrorCode ierr; 1491 PetscInt nthreads=0; 1492 1493 PetscFunctionBegin; 1494 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1495 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1496 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1497 1498 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1499 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1500 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1501 if (mumps->use_petsc_omp_support) { 1502 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1503 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1504 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1505 #else 1506 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1507 #endif 1508 } else { 1509 mumps->omp_comm = PETSC_COMM_SELF; 1510 mumps->mumps_comm = mumps->petsc_comm; 1511 mumps->is_omp_master = PETSC_TRUE; 1512 } 1513 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1514 1515 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1516 mumps->id.job = JOB_INIT; 1517 mumps->id.par = 1; /* host participates factorizaton and solve */ 1518 mumps->id.sym = mumps->sym; 1519 1520 PetscMUMPS_c(mumps); 1521 1522 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1523 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1524 */ 1525 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1526 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1527 1528 mumps->scat_rhs = NULL; 1529 mumps->scat_sol = NULL; 1530 1531 /* set PETSc-MUMPS default options - override MUMPS default */ 1532 mumps->id.ICNTL(3) = 0; 1533 mumps->id.ICNTL(4) = 0; 1534 if (mumps->petsc_size == 1) { 1535 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1536 } else { 1537 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1538 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1539 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1540 } 1541 1542 /* schur */ 1543 mumps->id.size_schur = 0; 1544 mumps->id.listvar_schur = NULL; 1545 mumps->id.schur = NULL; 1546 mumps->sizeredrhs = 0; 1547 mumps->schur_sol = NULL; 1548 mumps->schur_sizesol = 0; 1549 PetscFunctionReturn(0); 1550 } 1551 1552 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1553 { 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1558 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1559 if (mumps->id.INFOG(1) < 0) { 1560 if (A->erroriffailure) { 1561 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1562 } else { 1563 if (mumps->id.INFOG(1) == -6) { 1564 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1565 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1566 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1567 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1568 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1569 } else { 1570 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1571 F->factorerrortype = MAT_FACTOR_OTHER; 1572 } 1573 } 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1579 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1580 { 1581 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1582 PetscErrorCode ierr; 1583 Vec b; 1584 const PetscInt M = A->rmap->N; 1585 1586 PetscFunctionBegin; 1587 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1588 1589 /* Set MUMPS options from the options database */ 1590 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1591 1592 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1593 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1594 1595 /* analysis phase */ 1596 /*----------------*/ 1597 mumps->id.job = JOB_FACTSYMBOLIC; 1598 mumps->id.n = M; 1599 switch (mumps->id.ICNTL(18)) { 1600 case 0: /* centralized assembled matrix input */ 1601 if (!mumps->myid) { 1602 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1603 if (mumps->id.ICNTL(6)>1) { 1604 mumps->id.a = (MumpsScalar*)mumps->val; 1605 } 1606 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1607 /* 1608 PetscBool flag; 1609 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1610 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1611 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1612 */ 1613 if (!mumps->myid) { 1614 const PetscInt *idx; 1615 PetscInt i,*perm_in; 1616 1617 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1618 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1619 1620 mumps->id.perm_in = perm_in; 1621 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1622 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1623 } 1624 } 1625 } 1626 break; 1627 case 3: /* distributed assembled matrix input (size>1) */ 1628 mumps->id.nz_loc = mumps->nz; 1629 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1630 if (mumps->id.ICNTL(6)>1) { 1631 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1632 } 1633 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1634 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1635 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1636 ierr = VecDestroy(&b);CHKERRQ(ierr); 1637 break; 1638 } 1639 PetscMUMPS_c(mumps); 1640 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1641 1642 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1643 F->ops->solve = MatSolve_MUMPS; 1644 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1645 F->ops->matsolve = MatMatSolve_MUMPS; 1646 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1647 PetscFunctionReturn(0); 1648 } 1649 1650 /* Note the Petsc r and c permutations are ignored */ 1651 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1652 { 1653 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1654 PetscErrorCode ierr; 1655 Vec b; 1656 const PetscInt M = A->rmap->N; 1657 1658 PetscFunctionBegin; 1659 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1660 1661 /* Set MUMPS options from the options database */ 1662 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1663 1664 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1665 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1666 1667 /* analysis phase */ 1668 /*----------------*/ 1669 mumps->id.job = JOB_FACTSYMBOLIC; 1670 mumps->id.n = M; 1671 switch (mumps->id.ICNTL(18)) { 1672 case 0: /* centralized assembled matrix input */ 1673 if (!mumps->myid) { 1674 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1675 if (mumps->id.ICNTL(6)>1) { 1676 mumps->id.a = (MumpsScalar*)mumps->val; 1677 } 1678 } 1679 break; 1680 case 3: /* distributed assembled matrix input (size>1) */ 1681 mumps->id.nz_loc = mumps->nz; 1682 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1683 if (mumps->id.ICNTL(6)>1) { 1684 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1685 } 1686 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1687 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1688 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1689 ierr = VecDestroy(&b);CHKERRQ(ierr); 1690 break; 1691 } 1692 PetscMUMPS_c(mumps); 1693 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1694 1695 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1696 F->ops->solve = MatSolve_MUMPS; 1697 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1698 PetscFunctionReturn(0); 1699 } 1700 1701 /* Note the Petsc r permutation and factor info are ignored */ 1702 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1703 { 1704 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1705 PetscErrorCode ierr; 1706 Vec b; 1707 const PetscInt M = A->rmap->N; 1708 1709 PetscFunctionBegin; 1710 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1711 1712 /* Set MUMPS options from the options database */ 1713 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1714 1715 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1716 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1717 1718 /* analysis phase */ 1719 /*----------------*/ 1720 mumps->id.job = JOB_FACTSYMBOLIC; 1721 mumps->id.n = M; 1722 switch (mumps->id.ICNTL(18)) { 1723 case 0: /* centralized assembled matrix input */ 1724 if (!mumps->myid) { 1725 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1726 if (mumps->id.ICNTL(6)>1) { 1727 mumps->id.a = (MumpsScalar*)mumps->val; 1728 } 1729 } 1730 break; 1731 case 3: /* distributed assembled matrix input (size>1) */ 1732 mumps->id.nz_loc = mumps->nz; 1733 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1734 if (mumps->id.ICNTL(6)>1) { 1735 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1736 } 1737 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1738 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1739 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1740 ierr = VecDestroy(&b);CHKERRQ(ierr); 1741 break; 1742 } 1743 PetscMUMPS_c(mumps); 1744 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1745 1746 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1747 F->ops->solve = MatSolve_MUMPS; 1748 F->ops->solvetranspose = MatSolve_MUMPS; 1749 F->ops->matsolve = MatMatSolve_MUMPS; 1750 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1751 #if defined(PETSC_USE_COMPLEX) 1752 F->ops->getinertia = NULL; 1753 #else 1754 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1755 #endif 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1760 { 1761 PetscErrorCode ierr; 1762 PetscBool iascii; 1763 PetscViewerFormat format; 1764 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1765 1766 PetscFunctionBegin; 1767 /* check if matrix is mumps type */ 1768 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1769 1770 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1771 if (iascii) { 1772 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1773 if (format == PETSC_VIEWER_ASCII_INFO) { 1774 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1775 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1776 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1777 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1778 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1779 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1780 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1781 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1782 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1783 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1784 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1785 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1786 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1787 if (mumps->id.ICNTL(11)>0) { 1788 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1789 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1790 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1791 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1792 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1793 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1794 } 1795 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1796 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1797 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1798 /* ICNTL(15-17) not used */ 1799 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1805 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1812 1813 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1819 1820 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1826 1827 /* infomation local to each processor */ 1828 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1830 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1831 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1833 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1834 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1836 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1837 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1838 1839 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1840 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1841 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1842 1843 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1844 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1845 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1846 1847 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1848 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1849 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1850 1851 if (mumps->ninfo && mumps->ninfo <= 80){ 1852 PetscInt i; 1853 for (i=0; i<mumps->ninfo; i++){ 1854 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1855 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1856 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1857 } 1858 } 1859 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1860 1861 if (!mumps->myid) { /* information from the host */ 1862 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1863 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1864 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1865 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1866 1867 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1868 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1869 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1870 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1871 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1872 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1873 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1874 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1875 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1876 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1877 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1878 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1879 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1880 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1881 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1882 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1883 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1884 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1885 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1886 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1887 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1888 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1890 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1891 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1892 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1901 } 1902 } 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1908 { 1909 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1910 1911 PetscFunctionBegin; 1912 info->block_size = 1.0; 1913 info->nz_allocated = mumps->id.INFOG(20); 1914 info->nz_used = mumps->id.INFOG(20); 1915 info->nz_unneeded = 0.0; 1916 info->assemblies = 0.0; 1917 info->mallocs = 0.0; 1918 info->memory = 0.0; 1919 info->fill_ratio_given = 0; 1920 info->fill_ratio_needed = 0; 1921 info->factor_mallocs = 0; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 /* -------------------------------------------------------------------------------------------*/ 1926 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1927 { 1928 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1929 const PetscInt *idxs; 1930 PetscInt size,i; 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1935 if (mumps->petsc_size > 1) { 1936 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1937 1938 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1939 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1940 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1941 } 1942 if (mumps->id.size_schur != size) { 1943 ierr = PetscFree2(mumps->id.listvar_schur,mumps->id.schur);CHKERRQ(ierr); 1944 mumps->id.size_schur = size; 1945 mumps->id.schur_lld = size; 1946 ierr = PetscMalloc2(size,&mumps->id.listvar_schur,size*size,&mumps->id.schur);CHKERRQ(ierr); 1947 } 1948 1949 /* Schur complement matrix */ 1950 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.size_schur,(PetscScalar*)mumps->id.schur,&F->schur);CHKERRQ(ierr); 1951 if (mumps->sym == 1) { 1952 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1953 } 1954 1955 /* MUMPS expects Fortran style indices */ 1956 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1957 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1958 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1959 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1960 if (mumps->petsc_size > 1) { 1961 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1962 } else { 1963 if (F->factortype == MAT_FACTOR_LU) { 1964 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1965 } else { 1966 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1967 } 1968 } 1969 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1970 mumps->id.ICNTL(26) = -1; 1971 PetscFunctionReturn(0); 1972 } 1973 1974 /* -------------------------------------------------------------------------------------------*/ 1975 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 1976 { 1977 Mat St; 1978 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1979 PetscScalar *array; 1980 #if defined(PETSC_USE_COMPLEX) 1981 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 1982 #endif 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 1987 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 1988 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 1989 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 1990 ierr = MatSetUp(St);CHKERRQ(ierr); 1991 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 1992 if (!mumps->sym) { /* MUMPS always return a full matrix */ 1993 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1994 PetscInt i,j,N=mumps->id.size_schur; 1995 for (i=0;i<N;i++) { 1996 for (j=0;j<N;j++) { 1997 #if !defined(PETSC_USE_COMPLEX) 1998 PetscScalar val = mumps->id.schur[i*N+j]; 1999 #else 2000 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2001 #endif 2002 array[j*N+i] = val; 2003 } 2004 } 2005 } else { /* stored by columns */ 2006 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2007 } 2008 } else { /* either full or lower-triangular (not packed) */ 2009 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2010 PetscInt i,j,N=mumps->id.size_schur; 2011 for (i=0;i<N;i++) { 2012 for (j=i;j<N;j++) { 2013 #if !defined(PETSC_USE_COMPLEX) 2014 PetscScalar val = mumps->id.schur[i*N+j]; 2015 #else 2016 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2017 #endif 2018 array[i*N+j] = val; 2019 array[j*N+i] = val; 2020 } 2021 } 2022 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2023 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2024 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2025 PetscInt i,j,N=mumps->id.size_schur; 2026 for (i=0;i<N;i++) { 2027 for (j=0;j<i+1;j++) { 2028 #if !defined(PETSC_USE_COMPLEX) 2029 PetscScalar val = mumps->id.schur[i*N+j]; 2030 #else 2031 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2032 #endif 2033 array[i*N+j] = val; 2034 array[j*N+i] = val; 2035 } 2036 } 2037 } 2038 } 2039 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2040 *S = St; 2041 PetscFunctionReturn(0); 2042 } 2043 2044 /* -------------------------------------------------------------------------------------------*/ 2045 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2046 { 2047 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2048 2049 PetscFunctionBegin; 2050 mumps->id.ICNTL(icntl) = ival; 2051 PetscFunctionReturn(0); 2052 } 2053 2054 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2055 { 2056 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2057 2058 PetscFunctionBegin; 2059 *ival = mumps->id.ICNTL(icntl); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 /*@ 2064 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2065 2066 Logically Collective on Mat 2067 2068 Input Parameters: 2069 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2070 . icntl - index of MUMPS parameter array ICNTL() 2071 - ival - value of MUMPS ICNTL(icntl) 2072 2073 Options Database: 2074 . -mat_mumps_icntl_<icntl> <ival> 2075 2076 Level: beginner 2077 2078 References: 2079 . MUMPS Users' Guide 2080 2081 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2082 @*/ 2083 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2084 { 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 PetscValidType(F,1); 2089 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2090 PetscValidLogicalCollectiveInt(F,icntl,2); 2091 PetscValidLogicalCollectiveInt(F,ival,3); 2092 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2093 PetscFunctionReturn(0); 2094 } 2095 2096 /*@ 2097 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2098 2099 Logically Collective on Mat 2100 2101 Input Parameters: 2102 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2103 - icntl - index of MUMPS parameter array ICNTL() 2104 2105 Output Parameter: 2106 . ival - value of MUMPS ICNTL(icntl) 2107 2108 Level: beginner 2109 2110 References: 2111 . MUMPS Users' Guide 2112 2113 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2114 @*/ 2115 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2116 { 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 PetscValidType(F,1); 2121 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2122 PetscValidLogicalCollectiveInt(F,icntl,2); 2123 PetscValidIntPointer(ival,3); 2124 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 /* -------------------------------------------------------------------------------------------*/ 2129 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2130 { 2131 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2132 2133 PetscFunctionBegin; 2134 mumps->id.CNTL(icntl) = val; 2135 PetscFunctionReturn(0); 2136 } 2137 2138 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2139 { 2140 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2141 2142 PetscFunctionBegin; 2143 *val = mumps->id.CNTL(icntl); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 /*@ 2148 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2149 2150 Logically Collective on Mat 2151 2152 Input Parameters: 2153 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2154 . icntl - index of MUMPS parameter array CNTL() 2155 - val - value of MUMPS CNTL(icntl) 2156 2157 Options Database: 2158 . -mat_mumps_cntl_<icntl> <val> 2159 2160 Level: beginner 2161 2162 References: 2163 . MUMPS Users' Guide 2164 2165 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2166 @*/ 2167 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2168 { 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 PetscValidType(F,1); 2173 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2174 PetscValidLogicalCollectiveInt(F,icntl,2); 2175 PetscValidLogicalCollectiveReal(F,val,3); 2176 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /*@ 2181 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2182 2183 Logically Collective on Mat 2184 2185 Input Parameters: 2186 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2187 - icntl - index of MUMPS parameter array CNTL() 2188 2189 Output Parameter: 2190 . val - value of MUMPS CNTL(icntl) 2191 2192 Level: beginner 2193 2194 References: 2195 . MUMPS Users' Guide 2196 2197 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2198 @*/ 2199 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2200 { 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 PetscValidType(F,1); 2205 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2206 PetscValidLogicalCollectiveInt(F,icntl,2); 2207 PetscValidRealPointer(val,3); 2208 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2213 { 2214 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2215 2216 PetscFunctionBegin; 2217 *info = mumps->id.INFO(icntl); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2222 { 2223 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2224 2225 PetscFunctionBegin; 2226 *infog = mumps->id.INFOG(icntl); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2231 { 2232 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2233 2234 PetscFunctionBegin; 2235 *rinfo = mumps->id.RINFO(icntl); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2240 { 2241 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2242 2243 PetscFunctionBegin; 2244 *rinfog = mumps->id.RINFOG(icntl); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2249 { 2250 PetscErrorCode ierr; 2251 Mat Bt = NULL,Btseq = NULL; 2252 PetscBool flg; 2253 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2254 PetscScalar *aa; 2255 PetscInt spnr,*ia,*ja; 2256 2257 PetscFunctionBegin; 2258 PetscValidIntPointer(spRHS,2); 2259 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2260 if (flg) { 2261 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2262 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2263 2264 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2265 2266 if (mumps->petsc_size > 1) { 2267 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2268 Btseq = b->A; 2269 } else { 2270 Btseq = Bt; 2271 } 2272 2273 if (!mumps->myid) { 2274 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2275 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2276 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2277 2278 mumps->id.irhs_ptr = ia; 2279 mumps->id.irhs_sparse = ja; 2280 mumps->id.nz_rhs = ia[spnr] - 1; 2281 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2282 } else { 2283 mumps->id.irhs_ptr = NULL; 2284 mumps->id.irhs_sparse = NULL; 2285 mumps->id.nz_rhs = 0; 2286 mumps->id.rhs_sparse = NULL; 2287 } 2288 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2289 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2290 2291 /* solve phase */ 2292 /*-------------*/ 2293 mumps->id.job = JOB_SOLVE; 2294 PetscMUMPS_c(mumps); 2295 if (mumps->id.INFOG(1) < 0) 2296 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2297 2298 if (!mumps->myid) { 2299 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2300 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2301 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2302 } 2303 PetscFunctionReturn(0); 2304 } 2305 2306 /*@ 2307 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2308 2309 Logically Collective on Mat 2310 2311 Input Parameters: 2312 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2313 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2314 2315 Output Parameter: 2316 . spRHS - requested entries of inverse of A 2317 2318 Level: beginner 2319 2320 References: 2321 . MUMPS Users' Guide 2322 2323 .seealso: MatGetFactor(), MatCreateTranspose() 2324 @*/ 2325 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2326 { 2327 PetscErrorCode ierr; 2328 2329 PetscFunctionBegin; 2330 PetscValidType(F,1); 2331 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2332 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2337 { 2338 PetscErrorCode ierr; 2339 Mat spRHS; 2340 2341 PetscFunctionBegin; 2342 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2343 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2344 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 /*@ 2349 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2350 2351 Logically Collective on Mat 2352 2353 Input Parameters: 2354 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2355 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2356 2357 Output Parameter: 2358 . spRHST - requested entries of inverse of A^T 2359 2360 Level: beginner 2361 2362 References: 2363 . MUMPS Users' Guide 2364 2365 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2366 @*/ 2367 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2368 { 2369 PetscErrorCode ierr; 2370 PetscBool flg; 2371 2372 PetscFunctionBegin; 2373 PetscValidType(F,1); 2374 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2375 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2376 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2377 2378 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 /*@ 2383 MatMumpsGetInfo - Get MUMPS parameter INFO() 2384 2385 Logically Collective on Mat 2386 2387 Input Parameters: 2388 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2389 - icntl - index of MUMPS parameter array INFO() 2390 2391 Output Parameter: 2392 . ival - value of MUMPS INFO(icntl) 2393 2394 Level: beginner 2395 2396 References: 2397 . MUMPS Users' Guide 2398 2399 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2400 @*/ 2401 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2402 { 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 PetscValidType(F,1); 2407 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2408 PetscValidIntPointer(ival,3); 2409 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 /*@ 2414 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2415 2416 Logically Collective on Mat 2417 2418 Input Parameters: 2419 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2420 - icntl - index of MUMPS parameter array INFOG() 2421 2422 Output Parameter: 2423 . ival - value of MUMPS INFOG(icntl) 2424 2425 Level: beginner 2426 2427 References: 2428 . MUMPS Users' Guide 2429 2430 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2431 @*/ 2432 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2433 { 2434 PetscErrorCode ierr; 2435 2436 PetscFunctionBegin; 2437 PetscValidType(F,1); 2438 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2439 PetscValidIntPointer(ival,3); 2440 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } 2443 2444 /*@ 2445 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2446 2447 Logically Collective on Mat 2448 2449 Input Parameters: 2450 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2451 - icntl - index of MUMPS parameter array RINFO() 2452 2453 Output Parameter: 2454 . val - value of MUMPS RINFO(icntl) 2455 2456 Level: beginner 2457 2458 References: 2459 . MUMPS Users' Guide 2460 2461 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2462 @*/ 2463 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2464 { 2465 PetscErrorCode ierr; 2466 2467 PetscFunctionBegin; 2468 PetscValidType(F,1); 2469 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2470 PetscValidRealPointer(val,3); 2471 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 /*@ 2476 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2477 2478 Logically Collective on Mat 2479 2480 Input Parameters: 2481 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2482 - icntl - index of MUMPS parameter array RINFOG() 2483 2484 Output Parameter: 2485 . val - value of MUMPS RINFOG(icntl) 2486 2487 Level: beginner 2488 2489 References: 2490 . MUMPS Users' Guide 2491 2492 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2493 @*/ 2494 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2495 { 2496 PetscErrorCode ierr; 2497 2498 PetscFunctionBegin; 2499 PetscValidType(F,1); 2500 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2501 PetscValidRealPointer(val,3); 2502 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 /*MC 2507 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2508 distributed and sequential matrices via the external package MUMPS. 2509 2510 Works with MATAIJ and MATSBAIJ matrices 2511 2512 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2513 2514 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2515 2516 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2517 2518 Options Database Keys: 2519 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2520 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2521 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2522 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2523 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2524 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2525 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2526 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2527 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2528 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2529 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2530 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2531 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2532 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2533 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2534 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2535 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2536 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2537 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2538 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2539 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2540 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2541 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2542 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2543 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2544 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2545 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2546 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2547 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2548 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2549 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2550 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2551 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2552 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2553 2554 Level: beginner 2555 2556 Notes: 2557 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2558 2559 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2560 $ KSPGetPC(ksp,&pc); 2561 $ PCFactorGetMatrix(pc,&mat); 2562 $ MatMumpsGetInfo(mat,....); 2563 $ MatMumpsGetInfog(mat,....); etc. 2564 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2565 2566 Two modes to run MUMPS/PETSc with OpenMP 2567 2568 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2569 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2570 2571 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2572 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2573 2574 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2575 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2576 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2577 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2578 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2579 2580 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2581 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2582 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2583 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2584 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2585 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2586 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2587 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2588 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2589 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2590 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2591 examine the mapping result. 2592 2593 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2594 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2595 calls omp_set_num_threads(m) internally before calling MUMPS. 2596 2597 References: 2598 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2599 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2600 2601 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2602 2603 M*/ 2604 2605 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2606 { 2607 PetscFunctionBegin; 2608 *type = MATSOLVERMUMPS; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 /* MatGetFactor for Seq and MPI AIJ matrices */ 2613 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2614 { 2615 Mat B; 2616 PetscErrorCode ierr; 2617 Mat_MUMPS *mumps; 2618 PetscBool isSeqAIJ; 2619 2620 PetscFunctionBegin; 2621 /* Create the factorization matrix */ 2622 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2623 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2624 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2625 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2626 ierr = MatSetUp(B);CHKERRQ(ierr); 2627 2628 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2629 2630 B->ops->view = MatView_MUMPS; 2631 B->ops->getinfo = MatGetInfo_MUMPS; 2632 2633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2646 2647 if (ftype == MAT_FACTOR_LU) { 2648 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2649 B->factortype = MAT_FACTOR_LU; 2650 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2651 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2652 mumps->sym = 0; 2653 } else { 2654 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2655 B->factortype = MAT_FACTOR_CHOLESKY; 2656 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2657 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2658 #if defined(PETSC_USE_COMPLEX) 2659 mumps->sym = 2; 2660 #else 2661 if (A->spd_set && A->spd) mumps->sym = 1; 2662 else mumps->sym = 2; 2663 #endif 2664 } 2665 2666 /* set solvertype */ 2667 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2668 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2669 2670 B->ops->destroy = MatDestroy_MUMPS; 2671 B->data = (void*)mumps; 2672 2673 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2674 2675 *F = B; 2676 PetscFunctionReturn(0); 2677 } 2678 2679 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2680 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2681 { 2682 Mat B; 2683 PetscErrorCode ierr; 2684 Mat_MUMPS *mumps; 2685 PetscBool isSeqSBAIJ; 2686 2687 PetscFunctionBegin; 2688 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2689 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2690 /* Create the factorization matrix */ 2691 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2692 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2693 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2694 ierr = MatSetUp(B);CHKERRQ(ierr); 2695 2696 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2697 if (isSeqSBAIJ) { 2698 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2699 } else { 2700 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2701 } 2702 2703 B->ops->getinfo = MatGetInfo_External; 2704 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2705 B->ops->view = MatView_MUMPS; 2706 2707 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2710 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2714 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2715 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2716 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2717 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2718 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2719 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2720 2721 B->factortype = MAT_FACTOR_CHOLESKY; 2722 #if defined(PETSC_USE_COMPLEX) 2723 mumps->sym = 2; 2724 #else 2725 if (A->spd_set && A->spd) mumps->sym = 1; 2726 else mumps->sym = 2; 2727 #endif 2728 2729 /* set solvertype */ 2730 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2731 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2732 2733 B->ops->destroy = MatDestroy_MUMPS; 2734 B->data = (void*)mumps; 2735 2736 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2737 2738 *F = B; 2739 PetscFunctionReturn(0); 2740 } 2741 2742 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2743 { 2744 Mat B; 2745 PetscErrorCode ierr; 2746 Mat_MUMPS *mumps; 2747 PetscBool isSeqBAIJ; 2748 2749 PetscFunctionBegin; 2750 /* Create the factorization matrix */ 2751 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2752 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2753 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2754 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2755 ierr = MatSetUp(B);CHKERRQ(ierr); 2756 2757 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2758 if (ftype == MAT_FACTOR_LU) { 2759 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2760 B->factortype = MAT_FACTOR_LU; 2761 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2762 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2763 mumps->sym = 0; 2764 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2765 2766 B->ops->getinfo = MatGetInfo_External; 2767 B->ops->view = MatView_MUMPS; 2768 2769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2771 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2773 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2774 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2776 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2777 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2779 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2782 2783 /* set solvertype */ 2784 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2785 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2786 2787 B->ops->destroy = MatDestroy_MUMPS; 2788 B->data = (void*)mumps; 2789 2790 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2791 2792 *F = B; 2793 PetscFunctionReturn(0); 2794 } 2795 2796 /* MatGetFactor for Seq and MPI SELL matrices */ 2797 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2798 { 2799 Mat B; 2800 PetscErrorCode ierr; 2801 Mat_MUMPS *mumps; 2802 PetscBool isSeqSELL; 2803 2804 PetscFunctionBegin; 2805 /* Create the factorization matrix */ 2806 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2807 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2808 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2809 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2810 ierr = MatSetUp(B);CHKERRQ(ierr); 2811 2812 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2813 2814 B->ops->view = MatView_MUMPS; 2815 B->ops->getinfo = MatGetInfo_MUMPS; 2816 2817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2826 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2827 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2828 2829 if (ftype == MAT_FACTOR_LU) { 2830 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2831 B->factortype = MAT_FACTOR_LU; 2832 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2833 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2834 mumps->sym = 0; 2835 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2836 2837 /* set solvertype */ 2838 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2839 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2840 2841 B->ops->destroy = MatDestroy_MUMPS; 2842 B->data = (void*)mumps; 2843 2844 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2845 2846 *F = B; 2847 PetscFunctionReturn(0); 2848 } 2849 2850 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2851 { 2852 PetscErrorCode ierr; 2853 2854 PetscFunctionBegin; 2855 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2856 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2857 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2858 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2859 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2860 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2861 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2862 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2863 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2864 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2865 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2866 PetscFunctionReturn(0); 2867 } 2868 2869