1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #if defined(PETSC_USE_REAL_SINGLE) 12 #include <cmumps_c.h> 13 #else 14 #include <zmumps_c.h> 15 #endif 16 #else 17 #if defined(PETSC_USE_REAL_SINGLE) 18 #include <smumps_c.h> 19 #else 20 #include <dmumps_c.h> 21 #endif 22 #endif 23 EXTERN_C_END 24 #define JOB_INIT -1 25 #define JOB_FACTSYMBOLIC 1 26 #define JOB_FACTNUMERIC 2 27 #define JOB_SOLVE 3 28 #define JOB_END -2 29 30 /* calls to MUMPS */ 31 #if defined(PETSC_USE_COMPLEX) 32 #if defined(PETSC_USE_REAL_SINGLE) 33 #define PetscMUMPS_c cmumps_c 34 #else 35 #define PetscMUMPS_c zmumps_c 36 #endif 37 #else 38 #if defined(PETSC_USE_REAL_SINGLE) 39 #define PetscMUMPS_c smumps_c 40 #else 41 #define PetscMUMPS_c dmumps_c 42 #endif 43 #endif 44 45 46 /* macros s.t. indices match MUMPS documentation */ 47 #define ICNTL(I) icntl[(I)-1] 48 #define CNTL(I) cntl[(I)-1] 49 #define INFOG(I) infog[(I)-1] 50 #define INFO(I) info[(I)-1] 51 #define RINFOG(I) rinfog[(I)-1] 52 #define RINFO(I) rinfo[(I)-1] 53 54 typedef struct { 55 #if defined(PETSC_USE_COMPLEX) 56 #if defined(PETSC_USE_REAL_SINGLE) 57 CMUMPS_STRUC_C id; 58 #else 59 ZMUMPS_STRUC_C id; 60 #endif 61 #else 62 #if defined(PETSC_USE_REAL_SINGLE) 63 SMUMPS_STRUC_C id; 64 #else 65 DMUMPS_STRUC_C id; 66 #endif 67 #endif 68 69 MatStructure matstruc; 70 PetscMPIInt myid,size; 71 PetscInt *irn,*jcn,nz,sym; 72 PetscScalar *val; 73 MPI_Comm comm_mumps; 74 VecScatter scat_rhs, scat_sol; 75 PetscBool isAIJ,CleanUpMUMPS; 76 Vec b_seq,x_seq; 77 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 78 79 PetscErrorCode (*Destroy)(Mat); 80 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 81 } Mat_MUMPS; 82 83 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 84 85 86 /* MatConvertToTriples_A_B */ 87 /*convert Petsc matrix to triples: row[nz], col[nz], val[nz] */ 88 /* 89 input: 90 A - matrix in aij,baij or sbaij (bs=1) format 91 shift - 0: C style output triple; 1: Fortran style output triple. 92 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 93 MAT_REUSE_MATRIX: only the values in v array are updated 94 output: 95 nnz - dim of r, c, and v (number of local nonzero entries of A) 96 r, c, v - row and col index, matrix values (matrix triples) 97 98 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 99 freed with PetscFree((mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 100 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 101 102 */ 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatConvertToTriples_seqaij_seqaij" 106 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 107 { 108 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 109 PetscInt nz,rnz,i,j; 110 PetscErrorCode ierr; 111 PetscInt *row,*col; 112 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 113 114 PetscFunctionBegin; 115 *v=aa->a; 116 if (reuse == MAT_INITIAL_MATRIX) { 117 nz = aa->nz; 118 ai = aa->i; 119 aj = aa->j; 120 *nnz = nz; 121 ierr = PetscMalloc(2*nz*sizeof(PetscInt), &row);CHKERRQ(ierr); 122 col = row + nz; 123 124 nz = 0; 125 for (i=0; i<M; i++) { 126 rnz = ai[i+1] - ai[i]; 127 ajj = aj + ai[i]; 128 for (j=0; j<rnz; j++) { 129 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 130 } 131 } 132 *r = row; *c = col; 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatConvertToTriples_seqbaij_seqaij" 139 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 140 { 141 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 142 const PetscInt *ai,*aj,*ajj,bs=A->rmap->bs,bs2=aa->bs2,M=A->rmap->N/bs; 143 PetscInt nz,idx=0,rnz,i,j,k,m; 144 PetscErrorCode ierr; 145 PetscInt *row,*col; 146 147 PetscFunctionBegin; 148 *v = aa->a; 149 if (reuse == MAT_INITIAL_MATRIX) { 150 ai = aa->i; aj = aa->j; 151 nz = bs2*aa->nz; 152 *nnz = nz; 153 ierr = PetscMalloc(2*nz*sizeof(PetscInt), &row);CHKERRQ(ierr); 154 col = row + nz; 155 156 for (i=0; i<M; i++) { 157 ajj = aj + ai[i]; 158 rnz = ai[i+1] - ai[i]; 159 for (k=0; k<rnz; k++) { 160 for (j=0; j<bs; j++) { 161 for (m=0; m<bs; m++) { 162 row[idx] = i*bs + m + shift; 163 col[idx++] = bs*(ajj[k]) + j + shift; 164 } 165 } 166 } 167 } 168 *r = row; *c = col; 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatConvertToTriples_seqsbaij_seqsbaij" 175 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 176 { 177 const PetscInt *ai, *aj,*ajj,M=A->rmap->n; 178 PetscInt nz,rnz,i,j; 179 PetscErrorCode ierr; 180 PetscInt *row,*col; 181 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 182 183 PetscFunctionBegin; 184 *v = aa->a; 185 if (reuse == MAT_INITIAL_MATRIX) { 186 nz = aa->nz; 187 ai = aa->i; 188 aj = aa->j; 189 *v = aa->a; 190 *nnz = nz; 191 ierr = PetscMalloc(2*nz*sizeof(PetscInt), &row);CHKERRQ(ierr); 192 col = row + nz; 193 194 nz = 0; 195 for (i=0; i<M; i++) { 196 rnz = ai[i+1] - ai[i]; 197 ajj = aj + ai[i]; 198 for (j=0; j<rnz; j++) { 199 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 200 } 201 } 202 *r = row; *c = col; 203 } 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatConvertToTriples_seqaij_seqsbaij" 209 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 210 { 211 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 212 PetscInt nz,rnz,i,j; 213 const PetscScalar *av,*v1; 214 PetscScalar *val; 215 PetscErrorCode ierr; 216 PetscInt *row,*col; 217 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 218 219 PetscFunctionBegin; 220 ai =aa->i; aj=aa->j;av=aa->a; 221 adiag=aa->diag; 222 if (reuse == MAT_INITIAL_MATRIX) { 223 nz = M + (aa->nz-M)/2; 224 *nnz = nz; 225 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 226 col = row + nz; 227 val = (PetscScalar*)(col + nz); 228 229 nz = 0; 230 for (i=0; i<M; i++) { 231 rnz = ai[i+1] - adiag[i]; 232 ajj = aj + adiag[i]; 233 v1 = av + adiag[i]; 234 for (j=0; j<rnz; j++) { 235 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 236 } 237 } 238 *r = row; *c = col; *v = val; 239 } else { 240 nz = 0; val = *v; 241 for (i=0; i <M; i++) { 242 rnz = ai[i+1] - adiag[i]; 243 ajj = aj + adiag[i]; 244 v1 = av + adiag[i]; 245 for (j=0; j<rnz; j++) { 246 val[nz++] = v1[j]; 247 } 248 } 249 } 250 PetscFunctionReturn(0); 251 } 252 253 #undef __FUNCT__ 254 #define __FUNCT__ "MatConvertToTriples_mpisbaij_mpisbaij" 255 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 256 { 257 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 258 PetscErrorCode ierr; 259 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 260 PetscInt *row,*col; 261 const PetscScalar *av, *bv,*v1,*v2; 262 PetscScalar *val; 263 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 264 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 265 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 266 267 PetscFunctionBegin; 268 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 269 av=aa->a; bv=bb->a; 270 271 garray = mat->garray; 272 273 if (reuse == MAT_INITIAL_MATRIX) { 274 nz = aa->nz + bb->nz; 275 *nnz = nz; 276 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 277 col = row + nz; 278 val = (PetscScalar*)(col + nz); 279 280 *r = row; *c = col; *v = val; 281 } else { 282 row = *r; col = *c; val = *v; 283 } 284 285 jj = 0; irow = rstart; 286 for (i=0; i<m; i++) { 287 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 288 countA = ai[i+1] - ai[i]; 289 countB = bi[i+1] - bi[i]; 290 bjj = bj + bi[i]; 291 v1 = av + ai[i]; 292 v2 = bv + bi[i]; 293 294 /* A-part */ 295 for (j=0; j<countA; j++) { 296 if (reuse == MAT_INITIAL_MATRIX) { 297 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 298 } 299 val[jj++] = v1[j]; 300 } 301 302 /* B-part */ 303 for (j=0; j < countB; j++) { 304 if (reuse == MAT_INITIAL_MATRIX) { 305 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 306 } 307 val[jj++] = v2[j]; 308 } 309 irow++; 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpiaij" 316 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 317 { 318 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 319 PetscErrorCode ierr; 320 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 321 PetscInt *row,*col; 322 const PetscScalar *av, *bv,*v1,*v2; 323 PetscScalar *val; 324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 325 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data; 326 Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data; 327 328 PetscFunctionBegin; 329 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart; 330 av=aa->a; bv=bb->a; 331 332 garray = mat->garray; 333 334 if (reuse == MAT_INITIAL_MATRIX) { 335 nz = aa->nz + bb->nz; 336 *nnz = nz; 337 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 338 col = row + nz; 339 val = (PetscScalar*)(col + nz); 340 341 *r = row; *c = col; *v = val; 342 } else { 343 row = *r; col = *c; val = *v; 344 } 345 346 jj = 0; irow = rstart; 347 for (i=0; i<m; i++) { 348 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 349 countA = ai[i+1] - ai[i]; 350 countB = bi[i+1] - bi[i]; 351 bjj = bj + bi[i]; 352 v1 = av + ai[i]; 353 v2 = bv + bi[i]; 354 355 /* A-part */ 356 for (j=0; j<countA; j++) { 357 if (reuse == MAT_INITIAL_MATRIX) { 358 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 359 } 360 val[jj++] = v1[j]; 361 } 362 363 /* B-part */ 364 for (j=0; j < countB; j++) { 365 if (reuse == MAT_INITIAL_MATRIX) { 366 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 367 } 368 val[jj++] = v2[j]; 369 } 370 irow++; 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "MatConvertToTriples_mpibaij_mpiaij" 377 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 378 { 379 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 380 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 381 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 382 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 383 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 384 const PetscInt bs = A->rmap->bs,bs2=mat->bs2; 385 PetscErrorCode ierr; 386 PetscInt nz,i,j,k,n,jj,irow,countA,countB,idx; 387 PetscInt *row,*col; 388 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 389 PetscScalar *val; 390 391 PetscFunctionBegin; 392 if (reuse == MAT_INITIAL_MATRIX) { 393 nz = bs2*(aa->nz + bb->nz); 394 *nnz = nz; 395 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 396 col = row + nz; 397 val = (PetscScalar*)(col + nz); 398 399 *r = row; *c = col; *v = val; 400 } else { 401 row = *r; col = *c; val = *v; 402 } 403 404 jj = 0; irow = rstart; 405 for (i=0; i<mbs; i++) { 406 countA = ai[i+1] - ai[i]; 407 countB = bi[i+1] - bi[i]; 408 ajj = aj + ai[i]; 409 bjj = bj + bi[i]; 410 v1 = av + bs2*ai[i]; 411 v2 = bv + bs2*bi[i]; 412 413 idx = 0; 414 /* A-part */ 415 for (k=0; k<countA; k++) { 416 for (j=0; j<bs; j++) { 417 for (n=0; n<bs; n++) { 418 if (reuse == MAT_INITIAL_MATRIX) { 419 row[jj] = irow + n + shift; 420 col[jj] = rstart + bs*ajj[k] + j + shift; 421 } 422 val[jj++] = v1[idx++]; 423 } 424 } 425 } 426 427 idx = 0; 428 /* B-part */ 429 for (k=0; k<countB; k++) { 430 for (j=0; j<bs; j++) { 431 for (n=0; n<bs; n++) { 432 if (reuse == MAT_INITIAL_MATRIX) { 433 row[jj] = irow + n + shift; 434 col[jj] = bs*garray[bjj[k]] + j + shift; 435 } 436 val[jj++] = v2[idx++]; 437 } 438 } 439 } 440 irow += bs; 441 } 442 PetscFunctionReturn(0); 443 } 444 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatConvertToTriples_mpiaij_mpisbaij" 447 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 448 { 449 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 450 PetscErrorCode ierr; 451 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 452 PetscInt *row,*col; 453 const PetscScalar *av, *bv,*v1,*v2; 454 PetscScalar *val; 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *aa =(Mat_SeqAIJ*)(mat->A)->data; 457 Mat_SeqAIJ *bb =(Mat_SeqAIJ*)(mat->B)->data; 458 459 PetscFunctionBegin; 460 ai=aa->i; aj=aa->j; adiag=aa->diag; 461 bi=bb->i; bj=bb->j; garray = mat->garray; 462 av=aa->a; bv=bb->a; 463 464 rstart = A->rmap->rstart; 465 466 if (reuse == MAT_INITIAL_MATRIX) { 467 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 468 nzb = 0; /* num of upper triangular entries in mat->B */ 469 for (i=0; i<m; i++) { 470 nza += (ai[i+1] - adiag[i]); 471 countB = bi[i+1] - bi[i]; 472 bjj = bj + bi[i]; 473 for (j=0; j<countB; j++) { 474 if (garray[bjj[j]] > rstart) nzb++; 475 } 476 } 477 478 nz = nza + nzb; /* total nz of upper triangular part of mat */ 479 *nnz = nz; 480 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 481 col = row + nz; 482 val = (PetscScalar*)(col + nz); 483 484 *r = row; *c = col; *v = val; 485 } else { 486 row = *r; col = *c; val = *v; 487 } 488 489 jj = 0; irow = rstart; 490 for (i=0; i<m; i++) { 491 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 492 v1 = av + adiag[i]; 493 countA = ai[i+1] - adiag[i]; 494 countB = bi[i+1] - bi[i]; 495 bjj = bj + bi[i]; 496 v2 = bv + bi[i]; 497 498 /* A-part */ 499 for (j=0; j<countA; j++) { 500 if (reuse == MAT_INITIAL_MATRIX) { 501 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 502 } 503 val[jj++] = v1[j]; 504 } 505 506 /* B-part */ 507 for (j=0; j < countB; j++) { 508 if (garray[bjj[j]] > rstart) { 509 if (reuse == MAT_INITIAL_MATRIX) { 510 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 511 } 512 val[jj++] = v2[j]; 513 } 514 } 515 irow++; 516 } 517 PetscFunctionReturn(0); 518 } 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatDestroy_MUMPS" 522 PetscErrorCode MatDestroy_MUMPS(Mat A) 523 { 524 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 525 PetscErrorCode ierr; 526 527 PetscFunctionBegin; 528 if (mumps->CleanUpMUMPS) { 529 /* Terminate instance, deallocate memories */ 530 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 531 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 532 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 533 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 534 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 535 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 536 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 537 538 mumps->id.job = JOB_END; 539 PetscMUMPS_c(&mumps->id); 540 ierr = MPI_Comm_free(&(mumps->comm_mumps));CHKERRQ(ierr); 541 } 542 if (mumps->Destroy) { 543 ierr = (mumps->Destroy)(A);CHKERRQ(ierr); 544 } 545 ierr = PetscFree(A->spptr);CHKERRQ(ierr); 546 547 /* clear composed functions */ 548 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 549 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 550 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } 553 554 #undef __FUNCT__ 555 #define __FUNCT__ "MatSolve_MUMPS" 556 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 557 { 558 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 559 PetscScalar *array; 560 Vec b_seq; 561 IS is_iden,is_petsc; 562 PetscErrorCode ierr; 563 PetscInt i; 564 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 565 566 PetscFunctionBegin; 567 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 568 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 569 mumps->id.nrhs = 1; 570 b_seq = mumps->b_seq; 571 if (mumps->size > 1) { 572 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 573 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 575 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 576 } else { /* size == 1 */ 577 ierr = VecCopy(b,x);CHKERRQ(ierr); 578 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 579 } 580 if (!mumps->myid) { /* define rhs on the host */ 581 mumps->id.nrhs = 1; 582 #if defined(PETSC_USE_COMPLEX) 583 #if defined(PETSC_USE_REAL_SINGLE) 584 mumps->id.rhs = (mumps_complex*)array; 585 #else 586 mumps->id.rhs = (mumps_double_complex*)array; 587 #endif 588 #else 589 mumps->id.rhs = array; 590 #endif 591 } 592 593 /* solve phase */ 594 /*-------------*/ 595 mumps->id.job = JOB_SOLVE; 596 PetscMUMPS_c(&mumps->id); 597 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 598 599 if (mumps->size > 1) { /* convert mumps distributed solution to petsc mpi x */ 600 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 601 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 602 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 603 } 604 if (!mumps->scat_sol) { /* create scatter scat_sol */ 605 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 606 for (i=0; i<mumps->id.lsol_loc; i++) { 607 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 608 } 609 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 610 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 611 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 612 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 613 614 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 615 } 616 617 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 618 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 619 } 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatSolveTranspose_MUMPS" 625 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 626 { 627 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 628 PetscErrorCode ierr; 629 630 PetscFunctionBegin; 631 mumps->id.ICNTL(9) = 0; 632 633 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 634 635 mumps->id.ICNTL(9) = 1; 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatMatSolve_MUMPS" 641 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 642 { 643 PetscErrorCode ierr; 644 PetscBool flg; 645 646 PetscFunctionBegin; 647 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 648 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 649 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 650 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 651 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_MUMPS() is not implemented yet"); 652 PetscFunctionReturn(0); 653 } 654 655 #if !defined(PETSC_USE_COMPLEX) 656 /* 657 input: 658 F: numeric factor 659 output: 660 nneg: total number of negative pivots 661 nzero: 0 662 npos: (global dimension of F) - nneg 663 */ 664 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 667 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 668 { 669 Mat_MUMPS *mumps =(Mat_MUMPS*)F->spptr; 670 PetscErrorCode ierr; 671 PetscMPIInt size; 672 673 PetscFunctionBegin; 674 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 675 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 676 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 677 if (nneg) { 678 if (!mumps->myid) { 679 *nneg = mumps->id.INFOG(12); 680 } 681 ierr = MPI_Bcast(nneg,1,MPI_INT,0,mumps->comm_mumps);CHKERRQ(ierr); 682 } 683 if (nzero) *nzero = 0; 684 if (npos) *npos = F->rmap->N - (*nneg); 685 PetscFunctionReturn(0); 686 } 687 #endif /* !defined(PETSC_USE_COMPLEX) */ 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatFactorNumeric_MUMPS" 691 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 692 { 693 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->spptr; 694 PetscErrorCode ierr; 695 Mat F_diag; 696 PetscBool isMPIAIJ; 697 698 PetscFunctionBegin; 699 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 700 701 /* numerical factorization phase */ 702 /*-------------------------------*/ 703 mumps->id.job = JOB_FACTNUMERIC; 704 if (!mumps->id.ICNTL(18)) { 705 if (!mumps->myid) { 706 #if defined(PETSC_USE_COMPLEX) 707 #if defined(PETSC_USE_REAL_SINGLE) 708 mumps->id.a = (mumps_complex*)mumps->val; 709 #else 710 mumps->id.a = (mumps_double_complex*)mumps->val; 711 #endif 712 #else 713 mumps->id.a = mumps->val; 714 #endif 715 } 716 } else { 717 #if defined(PETSC_USE_COMPLEX) 718 #if defined(PETSC_USE_REAL_SINGLE) 719 mumps->id.a_loc = (mumps_complex*)mumps->val; 720 #else 721 mumps->id.a_loc = (mumps_double_complex*)mumps->val; 722 #endif 723 #else 724 mumps->id.a_loc = mumps->val; 725 #endif 726 } 727 PetscMUMPS_c(&mumps->id); 728 if (mumps->id.INFOG(1) < 0) { 729 if (mumps->id.INFO(1) == -13) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",mumps->id.INFO(2)); 730 else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",mumps->id.INFO(1),mumps->id.INFO(2)); 731 } 732 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 733 734 if (mumps->size > 1) { 735 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 736 if (isMPIAIJ) F_diag = ((Mat_MPIAIJ*)(F)->data)->A; 737 else F_diag = ((Mat_MPISBAIJ*)(F)->data)->A; 738 F_diag->assembled = PETSC_TRUE; 739 if (mumps->scat_sol) { 740 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 741 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 742 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 743 } 744 } 745 (F)->assembled = PETSC_TRUE; 746 mumps->matstruc = SAME_NONZERO_PATTERN; 747 mumps->CleanUpMUMPS = PETSC_TRUE; 748 749 if (mumps->size > 1) { 750 /* distributed solution */ 751 if (!mumps->scat_sol) { 752 /* Create x_seq=sol_loc for repeated use */ 753 PetscInt lsol_loc; 754 PetscScalar *sol_loc; 755 756 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 757 758 ierr = PetscMalloc2(lsol_loc,PetscScalar,&sol_loc,lsol_loc,PetscInt,&mumps->id.isol_loc);CHKERRQ(ierr); 759 760 mumps->id.lsol_loc = lsol_loc; 761 #if defined(PETSC_USE_COMPLEX) 762 #if defined(PETSC_USE_REAL_SINGLE) 763 mumps->id.sol_loc = (mumps_complex*)sol_loc; 764 #else 765 mumps->id.sol_loc = (mumps_double_complex*)sol_loc; 766 #endif 767 #else 768 mumps->id.sol_loc = sol_loc; 769 #endif 770 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 771 } 772 } 773 PetscFunctionReturn(0); 774 } 775 776 /* Sets MUMPS options from the options database */ 777 #undef __FUNCT__ 778 #define __FUNCT__ "PetscSetMUMPSFromOptions" 779 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 780 { 781 Mat_MUMPS *mumps = (Mat_MUMPS*)F->spptr; 782 PetscErrorCode ierr; 783 PetscInt icntl; 784 PetscBool flg; 785 786 PetscFunctionBegin; 787 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 788 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 789 if (flg) mumps->id.ICNTL(1) = icntl; 790 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 791 if (flg) mumps->id.ICNTL(2) = icntl; 792 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 793 if (flg) mumps->id.ICNTL(3) = icntl; 794 795 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 796 if (flg) mumps->id.ICNTL(4) = icntl; 797 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 798 799 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permuting and/or scaling the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 800 if (flg) mumps->id.ICNTL(6) = icntl; 801 802 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 803 if (flg) { 804 if (icntl== 1 && mumps->size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 805 else mumps->id.ICNTL(7) = icntl; 806 } 807 808 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 809 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 810 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to the linear system solved (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 811 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control: defines the ordering strategy with scaling constraints (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 812 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control: with or without ScaLAPACK","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 813 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 814 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 815 816 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core facility (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 817 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 818 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 819 if (mumps->id.ICNTL(24)) { 820 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 821 } 822 823 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computation of a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 824 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): Schur options for right-hand side or solution vector","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 825 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): experimental parameter","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 826 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 827 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 828 ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); 829 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): factors can be discarded in the solve phase","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 830 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 831 832 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 833 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 834 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 835 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 836 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 837 838 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL); 839 PetscOptionsEnd(); 840 PetscFunctionReturn(0); 841 } 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "PetscInitializeMUMPS" 845 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 846 { 847 PetscErrorCode ierr; 848 849 PetscFunctionBegin; 850 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A), &mumps->myid); 851 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&mumps->size);CHKERRQ(ierr); 852 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mumps->comm_mumps));CHKERRQ(ierr); 853 854 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->comm_mumps); 855 856 mumps->id.job = JOB_INIT; 857 mumps->id.par = 1; /* host participates factorizaton and solve */ 858 mumps->id.sym = mumps->sym; 859 PetscMUMPS_c(&mumps->id); 860 861 mumps->CleanUpMUMPS = PETSC_FALSE; 862 mumps->scat_rhs = NULL; 863 mumps->scat_sol = NULL; 864 865 /* set PETSc-MUMPS default options - override MUMPS default */ 866 mumps->id.ICNTL(3) = 0; 867 mumps->id.ICNTL(4) = 0; 868 if (mumps->size == 1) { 869 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 870 } else { 871 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 872 mumps->id.ICNTL(21) = 1; /* distributed solution */ 873 } 874 PetscFunctionReturn(0); 875 } 876 877 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 880 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 881 { 882 Mat_MUMPS *mumps = (Mat_MUMPS*)F->spptr; 883 PetscErrorCode ierr; 884 Vec b; 885 IS is_iden; 886 const PetscInt M = A->rmap->N; 887 888 PetscFunctionBegin; 889 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 890 891 /* Set MUMPS options from the options database */ 892 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 893 894 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 895 896 /* analysis phase */ 897 /*----------------*/ 898 mumps->id.job = JOB_FACTSYMBOLIC; 899 mumps->id.n = M; 900 switch (mumps->id.ICNTL(18)) { 901 case 0: /* centralized assembled matrix input */ 902 if (!mumps->myid) { 903 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 904 if (mumps->id.ICNTL(6)>1) { 905 #if defined(PETSC_USE_COMPLEX) 906 #if defined(PETSC_USE_REAL_SINGLE) 907 mumps->id.a = (mumps_complex*)mumps->val; 908 #else 909 mumps->id.a = (mumps_double_complex*)mumps->val; 910 #endif 911 #else 912 mumps->id.a = mumps->val; 913 #endif 914 } 915 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 916 /* 917 PetscBool flag; 918 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 919 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 920 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 921 */ 922 if (!mumps->myid) { 923 const PetscInt *idx; 924 PetscInt i,*perm_in; 925 926 ierr = PetscMalloc(M*sizeof(PetscInt),&perm_in);CHKERRQ(ierr); 927 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 928 929 mumps->id.perm_in = perm_in; 930 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 931 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 932 } 933 } 934 } 935 break; 936 case 3: /* distributed assembled matrix input (size>1) */ 937 mumps->id.nz_loc = mumps->nz; 938 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 939 if (mumps->id.ICNTL(6)>1) { 940 #if defined(PETSC_USE_COMPLEX) 941 #if defined(PETSC_USE_REAL_SINGLE) 942 mumps->id.a_loc = (mumps_complex*)mumps->val; 943 #else 944 mumps->id.a_loc = (mumps_double_complex*)mumps->val; 945 #endif 946 #else 947 mumps->id.a_loc = mumps->val; 948 #endif 949 } 950 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 951 if (!mumps->myid) { 952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 953 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 954 } else { 955 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 956 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 957 } 958 ierr = VecCreate(PetscObjectComm((PetscObject)A),&b);CHKERRQ(ierr); 959 ierr = VecSetSizes(b,A->rmap->n,PETSC_DECIDE);CHKERRQ(ierr); 960 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 961 962 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 963 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 964 ierr = VecDestroy(&b);CHKERRQ(ierr); 965 break; 966 } 967 PetscMUMPS_c(&mumps->id); 968 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 969 970 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 971 F->ops->solve = MatSolve_MUMPS; 972 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 973 F->ops->matsolve = 0; /* use MatMatSolve_Basic() until mumps supports distributed rhs */ 974 PetscFunctionReturn(0); 975 } 976 977 /* Note the Petsc r and c permutations are ignored */ 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatLUFactorSymbolic_BAIJMUMPS" 980 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 981 { 982 Mat_MUMPS *mumps = (Mat_MUMPS*)F->spptr; 983 PetscErrorCode ierr; 984 Vec b; 985 IS is_iden; 986 const PetscInt M = A->rmap->N; 987 988 PetscFunctionBegin; 989 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 990 991 /* Set MUMPS options from the options database */ 992 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 993 994 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 995 996 /* analysis phase */ 997 /*----------------*/ 998 mumps->id.job = JOB_FACTSYMBOLIC; 999 mumps->id.n = M; 1000 switch (mumps->id.ICNTL(18)) { 1001 case 0: /* centralized assembled matrix input */ 1002 if (!mumps->myid) { 1003 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1004 if (mumps->id.ICNTL(6)>1) { 1005 #if defined(PETSC_USE_COMPLEX) 1006 #if defined(PETSC_USE_REAL_SINGLE) 1007 mumps->id.a = (mumps_complex*)mumps->val; 1008 #else 1009 mumps->id.a = (mumps_double_complex*)mumps->val; 1010 #endif 1011 #else 1012 mumps->id.a = mumps->val; 1013 #endif 1014 } 1015 } 1016 break; 1017 case 3: /* distributed assembled matrix input (size>1) */ 1018 mumps->id.nz_loc = mumps->nz; 1019 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1020 if (mumps->id.ICNTL(6)>1) { 1021 #if defined(PETSC_USE_COMPLEX) 1022 #if defined(PETSC_USE_REAL_SINGLE) 1023 mumps->id.a_loc = (mumps_complex*)mumps->val; 1024 #else 1025 mumps->id.a_loc = (mumps_double_complex*)mumps->val; 1026 #endif 1027 #else 1028 mumps->id.a_loc = mumps->val; 1029 #endif 1030 } 1031 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1032 if (!mumps->myid) { 1033 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1034 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1035 } else { 1036 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1037 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1038 } 1039 ierr = VecCreate(PetscObjectComm((PetscObject)A),&b);CHKERRQ(ierr); 1040 ierr = VecSetSizes(b,A->rmap->n,PETSC_DECIDE);CHKERRQ(ierr); 1041 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 1042 1043 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1044 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1045 ierr = VecDestroy(&b);CHKERRQ(ierr); 1046 break; 1047 } 1048 PetscMUMPS_c(&mumps->id); 1049 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1050 1051 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1052 F->ops->solve = MatSolve_MUMPS; 1053 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1054 PetscFunctionReturn(0); 1055 } 1056 1057 /* Note the Petsc r permutation and factor info are ignored */ 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatCholeskyFactorSymbolic_MUMPS" 1060 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1061 { 1062 Mat_MUMPS *mumps = (Mat_MUMPS*)F->spptr; 1063 PetscErrorCode ierr; 1064 Vec b; 1065 IS is_iden; 1066 const PetscInt M = A->rmap->N; 1067 1068 PetscFunctionBegin; 1069 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1070 1071 /* Set MUMPS options from the options database */ 1072 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1073 1074 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1075 1076 /* analysis phase */ 1077 /*----------------*/ 1078 mumps->id.job = JOB_FACTSYMBOLIC; 1079 mumps->id.n = M; 1080 switch (mumps->id.ICNTL(18)) { 1081 case 0: /* centralized assembled matrix input */ 1082 if (!mumps->myid) { 1083 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1084 if (mumps->id.ICNTL(6)>1) { 1085 #if defined(PETSC_USE_COMPLEX) 1086 #if defined(PETSC_USE_REAL_SINGLE) 1087 mumps->id.a = (mumps_complex*)mumps->val; 1088 #else 1089 mumps->id.a = (mumps_double_complex*)mumps->val; 1090 #endif 1091 #else 1092 mumps->id.a = mumps->val; 1093 #endif 1094 } 1095 } 1096 break; 1097 case 3: /* distributed assembled matrix input (size>1) */ 1098 mumps->id.nz_loc = mumps->nz; 1099 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1100 if (mumps->id.ICNTL(6)>1) { 1101 #if defined(PETSC_USE_COMPLEX) 1102 #if defined(PETSC_USE_REAL_SINGLE) 1103 mumps->id.a_loc = (mumps_complex*)mumps->val; 1104 #else 1105 mumps->id.a_loc = (mumps_double_complex*)mumps->val; 1106 #endif 1107 #else 1108 mumps->id.a_loc = mumps->val; 1109 #endif 1110 } 1111 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1112 if (!mumps->myid) { 1113 ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap->N,&mumps->b_seq);CHKERRQ(ierr); 1114 ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap->N,0,1,&is_iden);CHKERRQ(ierr); 1115 } else { 1116 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&mumps->b_seq);CHKERRQ(ierr); 1117 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr); 1118 } 1119 ierr = VecCreate(PetscObjectComm((PetscObject)A),&b);CHKERRQ(ierr); 1120 ierr = VecSetSizes(b,A->rmap->n,PETSC_DECIDE);CHKERRQ(ierr); 1121 ierr = VecSetFromOptions(b);CHKERRQ(ierr); 1122 1123 ierr = VecScatterCreate(b,is_iden,mumps->b_seq,is_iden,&mumps->scat_rhs);CHKERRQ(ierr); 1124 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1125 ierr = VecDestroy(&b);CHKERRQ(ierr); 1126 break; 1127 } 1128 PetscMUMPS_c(&mumps->id); 1129 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1130 1131 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1132 F->ops->solve = MatSolve_MUMPS; 1133 F->ops->solvetranspose = MatSolve_MUMPS; 1134 F->ops->matsolve = 0; /* use MatMatSolve_Basic() until mumps supports distributed rhs */ 1135 #if !defined(PETSC_USE_COMPLEX) 1136 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1137 #else 1138 F->ops->getinertia = NULL; 1139 #endif 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatView_MUMPS" 1145 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1146 { 1147 PetscErrorCode ierr; 1148 PetscBool iascii; 1149 PetscViewerFormat format; 1150 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 1151 1152 PetscFunctionBegin; 1153 /* check if matrix is mumps type */ 1154 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1155 1156 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1157 if (iascii) { 1158 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1159 if (format == PETSC_VIEWER_ASCII_INFO) { 1160 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1162 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1163 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1164 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1165 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1169 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequentia matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scalling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1173 if (mumps->id.ICNTL(11)>0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1175 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1176 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1177 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1178 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1179 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1180 } 1181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1183 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1184 /* ICNTL(15-17) not used */ 1185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Shur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (somumpstion struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1190 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1191 1192 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1193 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1194 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1195 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1196 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1197 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1198 1199 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1200 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1201 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1202 1203 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1204 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1205 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absomumpste pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1206 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (vamumpse of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1207 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1208 1209 /* infomation local to each processor */ 1210 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1211 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1212 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1213 ierr = PetscViewerFlush(viewer); 1214 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1215 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1216 ierr = PetscViewerFlush(viewer); 1217 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1218 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1219 ierr = PetscViewerFlush(viewer); 1220 1221 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1222 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1223 ierr = PetscViewerFlush(viewer); 1224 1225 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1226 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1227 ierr = PetscViewerFlush(viewer); 1228 1229 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1230 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1231 ierr = PetscViewerFlush(viewer); 1232 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1233 1234 if (!mumps->myid) { /* information from the host */ 1235 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1236 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1237 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1238 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1239 1240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1263 } 1264 } 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatGetInfo_MUMPS" 1271 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1272 { 1273 Mat_MUMPS *mumps =(Mat_MUMPS*)A->spptr; 1274 1275 PetscFunctionBegin; 1276 info->block_size = 1.0; 1277 info->nz_allocated = mumps->id.INFOG(20); 1278 info->nz_used = mumps->id.INFOG(20); 1279 info->nz_unneeded = 0.0; 1280 info->assemblies = 0.0; 1281 info->mallocs = 0.0; 1282 info->memory = 0.0; 1283 info->fill_ratio_given = 0; 1284 info->fill_ratio_needed = 0; 1285 info->factor_mallocs = 0; 1286 PetscFunctionReturn(0); 1287 } 1288 1289 /* -------------------------------------------------------------------------------------------*/ 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatMumpsSetIcntl_MUMPS" 1292 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 1293 { 1294 Mat_MUMPS *mumps =(Mat_MUMPS*)F->spptr; 1295 1296 PetscFunctionBegin; 1297 mumps->id.ICNTL(icntl) = ival; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatMumpsSetIcntl" 1303 /*@ 1304 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 1305 1306 Logically Collective on Mat 1307 1308 Input Parameters: 1309 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1310 . icntl - index of MUMPS parameter array ICNTL() 1311 - ival - value of MUMPS ICNTL(icntl) 1312 1313 Options Database: 1314 . -mat_mumps_icntl_<icntl> <ival> 1315 1316 Level: beginner 1317 1318 References: MUMPS Users' Guide 1319 1320 .seealso: MatGetFactor() 1321 @*/ 1322 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 1323 { 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 PetscValidLogicalCollectiveInt(F,icntl,2); 1328 PetscValidLogicalCollectiveInt(F,ival,3); 1329 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* -------------------------------------------------------------------------------------------*/ 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatMumpsSetCntl_MUMPS" 1336 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 1337 { 1338 Mat_MUMPS *mumps =(Mat_MUMPS*)F->spptr; 1339 1340 PetscFunctionBegin; 1341 mumps->id.CNTL(icntl) = val; 1342 PetscFunctionReturn(0); 1343 } 1344 1345 #undef __FUNCT__ 1346 #define __FUNCT__ "MatMumpsSetCntl" 1347 /*@ 1348 MatMumpsSetCntl - Set MUMPS parameter CNTL() 1349 1350 Logically Collective on Mat 1351 1352 Input Parameters: 1353 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 1354 . icntl - index of MUMPS parameter array CNTL() 1355 - val - value of MUMPS CNTL(icntl) 1356 1357 Options Database: 1358 . -mat_mumps_cntl_<icntl> <val> 1359 1360 Level: beginner 1361 1362 References: MUMPS Users' Guide 1363 1364 .seealso: MatGetFactor() 1365 @*/ 1366 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 1367 { 1368 PetscErrorCode ierr; 1369 1370 PetscFunctionBegin; 1371 PetscValidLogicalCollectiveInt(F,icntl,2); 1372 PetscValidLogicalCollectiveInt(F,val,3); 1373 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 1377 /*MC 1378 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 1379 distributed and sequential matrices via the external package MUMPS. 1380 1381 Works with MATAIJ and MATSBAIJ matrices 1382 1383 Options Database Keys: 1384 + -mat_mumps_icntl_4 <0,...,4> - print level 1385 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 1386 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guidec) 1387 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 1388 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 1389 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 1390 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 1391 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 1392 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 1393 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 1394 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 1395 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 1396 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 1397 1398 Level: beginner 1399 1400 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage 1401 1402 M*/ 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatFactorGetSolverPackage_mumps" 1406 static PetscErrorCode MatFactorGetSolverPackage_mumps(Mat A,const MatSolverPackage *type) 1407 { 1408 PetscFunctionBegin; 1409 *type = MATSOLVERMUMPS; 1410 PetscFunctionReturn(0); 1411 } 1412 1413 /* MatGetFactor for Seq and MPI AIJ matrices */ 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatGetFactor_aij_mumps" 1416 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 1417 { 1418 Mat B; 1419 PetscErrorCode ierr; 1420 Mat_MUMPS *mumps; 1421 PetscBool isSeqAIJ; 1422 1423 PetscFunctionBegin; 1424 /* Create the factorization matrix */ 1425 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 1426 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1427 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1428 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1429 if (isSeqAIJ) { 1430 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 1431 } else { 1432 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 1433 } 1434 1435 ierr = PetscNewLog(B,Mat_MUMPS,&mumps);CHKERRQ(ierr); 1436 1437 B->ops->view = MatView_MUMPS; 1438 B->ops->getinfo = MatGetInfo_MUMPS; 1439 1440 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mumps);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 1443 if (ftype == MAT_FACTOR_LU) { 1444 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 1445 B->factortype = MAT_FACTOR_LU; 1446 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 1447 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 1448 mumps->sym = 0; 1449 } else { 1450 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 1451 B->factortype = MAT_FACTOR_CHOLESKY; 1452 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 1453 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 1454 if (A->spd_set && A->spd) mumps->sym = 1; 1455 else mumps->sym = 2; 1456 } 1457 1458 mumps->isAIJ = PETSC_TRUE; 1459 mumps->Destroy = B->ops->destroy; 1460 B->ops->destroy = MatDestroy_MUMPS; 1461 B->spptr = (void*)mumps; 1462 1463 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 1464 1465 *F = B; 1466 PetscFunctionReturn(0); 1467 } 1468 1469 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatGetFactor_sbaij_mumps" 1472 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 1473 { 1474 Mat B; 1475 PetscErrorCode ierr; 1476 Mat_MUMPS *mumps; 1477 PetscBool isSeqSBAIJ; 1478 1479 PetscFunctionBegin; 1480 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 1481 if (A->rmap->bs > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with block size > 1 with MUMPS Cholesky, use AIJ matrix instead"); 1482 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 1483 /* Create the factorization matrix */ 1484 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1485 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1486 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1487 ierr = PetscNewLog(B,Mat_MUMPS,&mumps);CHKERRQ(ierr); 1488 if (isSeqSBAIJ) { 1489 ierr = MatSeqSBAIJSetPreallocation(B,1,0,NULL);CHKERRQ(ierr); 1490 1491 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 1492 } else { 1493 ierr = MatMPISBAIJSetPreallocation(B,1,0,NULL,0,NULL);CHKERRQ(ierr); 1494 1495 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 1496 } 1497 1498 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 1499 B->ops->view = MatView_MUMPS; 1500 1501 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mumps);CHKERRQ(ierr); 1502 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl);CHKERRQ(ierr); 1503 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl);CHKERRQ(ierr); 1504 1505 B->factortype = MAT_FACTOR_CHOLESKY; 1506 if (A->spd_set && A->spd) mumps->sym = 1; 1507 else mumps->sym = 2; 1508 1509 mumps->isAIJ = PETSC_FALSE; 1510 mumps->Destroy = B->ops->destroy; 1511 B->ops->destroy = MatDestroy_MUMPS; 1512 B->spptr = (void*)mumps; 1513 1514 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 1515 1516 *F = B; 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatGetFactor_baij_mumps" 1522 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 1523 { 1524 Mat B; 1525 PetscErrorCode ierr; 1526 Mat_MUMPS *mumps; 1527 PetscBool isSeqBAIJ; 1528 1529 PetscFunctionBegin; 1530 /* Create the factorization matrix */ 1531 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 1532 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1533 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1534 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1535 if (isSeqBAIJ) { 1536 ierr = MatSeqBAIJSetPreallocation(B,A->rmap->bs,0,NULL);CHKERRQ(ierr); 1537 } else { 1538 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1539 } 1540 1541 ierr = PetscNewLog(B,Mat_MUMPS,&mumps);CHKERRQ(ierr); 1542 if (ftype == MAT_FACTOR_LU) { 1543 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 1544 B->factortype = MAT_FACTOR_LU; 1545 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 1546 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 1547 mumps->sym = 0; 1548 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 1549 1550 B->ops->view = MatView_MUMPS; 1551 1552 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mumps);CHKERRQ(ierr); 1553 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 1554 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 1555 1556 mumps->isAIJ = PETSC_TRUE; 1557 mumps->Destroy = B->ops->destroy; 1558 B->ops->destroy = MatDestroy_MUMPS; 1559 B->spptr = (void*)mumps; 1560 1561 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 1562 1563 *F = B; 1564 PetscFunctionReturn(0); 1565 } 1566 1567