1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 154 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 155 ierr = MatPinToCPU(B,S->pinnedtocpu);CHKERRQ(ierr); 156 #endif 157 switch (schurstatus) { 158 case MAT_FACTOR_SCHUR_FACTORED: 159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 160 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 161 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 162 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 163 #endif 164 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 165 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 166 } else { 167 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 168 } 169 break; 170 case MAT_FACTOR_SCHUR_INVERTED: 171 sizesol = mumps->id.nrhs*mumps->id.size_schur; 172 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 173 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 174 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 175 mumps->schur_sizesol = sizesol; 176 } 177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 178 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 179 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 180 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 181 #endif 182 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 183 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 184 } else { 185 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 186 } 187 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 188 break; 189 default: 190 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 191 break; 192 } 193 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 194 ierr = MatDestroy(&B);CHKERRQ(ierr); 195 ierr = MatDestroy(&X);CHKERRQ(ierr); 196 PetscFunctionReturn(0); 197 } 198 199 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 200 { 201 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 206 PetscFunctionReturn(0); 207 } 208 if (!expansion) { /* prepare for the condensation step */ 209 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 210 /* allocate MUMPS internal array to store reduced right-hand sides */ 211 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 212 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 213 mumps->id.lredrhs = mumps->id.size_schur; 214 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 215 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 216 } 217 mumps->id.ICNTL(26) = 1; /* condensation phase */ 218 } else { /* prepare for the expansion step */ 219 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 220 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 221 mumps->id.ICNTL(26) = 2; /* expansion phase */ 222 PetscMUMPS_c(mumps); 223 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 224 /* restore defaults */ 225 mumps->id.ICNTL(26) = -1; 226 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 227 if (mumps->id.nrhs > 1) { 228 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 229 mumps->id.lredrhs = 0; 230 mumps->sizeredrhs = 0; 231 } 232 } 233 PetscFunctionReturn(0); 234 } 235 236 /* 237 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 238 239 input: 240 A - matrix in aij,baij or sbaij format 241 shift - 0: C style output triple; 1: Fortran style output triple. 242 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 243 MAT_REUSE_MATRIX: only the values in v array are updated 244 output: 245 nnz - dim of r, c, and v (number of local nonzero entries of A) 246 r, c, v - row and col index, matrix values (matrix triples) 247 248 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 249 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 250 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 251 252 */ 253 254 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 255 { 256 const PetscScalar *av; 257 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 258 PetscInt nz,rnz,i,j; 259 PetscErrorCode ierr; 260 PetscInt *row,*col; 261 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 262 263 PetscFunctionBegin; 264 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 265 *v = (PetscScalar*)av; 266 if (reuse == MAT_INITIAL_MATRIX) { 267 nz = aa->nz; 268 ai = aa->i; 269 aj = aa->j; 270 *nnz = nz; 271 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 272 col = row + nz; 273 274 nz = 0; 275 for (i=0; i<M; i++) { 276 rnz = ai[i+1] - ai[i]; 277 ajj = aj + ai[i]; 278 for (j=0; j<rnz; j++) { 279 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 280 } 281 } 282 *r = row; *c = col; 283 } 284 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 289 { 290 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 291 PetscInt *ptr; 292 293 PetscFunctionBegin; 294 *v = a->val; 295 if (reuse == MAT_INITIAL_MATRIX) { 296 PetscInt nz,i,j,row; 297 PetscErrorCode ierr; 298 299 nz = a->sliidx[a->totalslices]; 300 *nnz = nz; 301 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 302 *r = ptr; 303 *c = ptr + nz; 304 305 for (i=0; i<a->totalslices; i++) { 306 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 307 *ptr++ = 8*i + row + shift; 308 } 309 } 310 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 311 } 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 316 { 317 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 318 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 319 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 320 PetscErrorCode ierr; 321 PetscInt *row,*col; 322 323 PetscFunctionBegin; 324 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 325 M = A->rmap->N/bs; 326 *v = aa->a; 327 if (reuse == MAT_INITIAL_MATRIX) { 328 ai = aa->i; aj = aa->j; 329 nz = bs2*aa->nz; 330 *nnz = nz; 331 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 332 col = row + nz; 333 334 for (i=0; i<M; i++) { 335 ajj = aj + ai[i]; 336 rnz = ai[i+1] - ai[i]; 337 for (k=0; k<rnz; k++) { 338 for (j=0; j<bs; j++) { 339 for (m=0; m<bs; m++) { 340 row[idx] = i*bs + m + shift; 341 col[idx++] = bs*(ajj[k]) + j + shift; 342 } 343 } 344 } 345 } 346 *r = row; *c = col; 347 } 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 352 { 353 const PetscInt *ai, *aj,*ajj; 354 PetscInt nz,rnz,i,j,k,m,bs; 355 PetscErrorCode ierr; 356 PetscInt *row,*col; 357 PetscScalar *val; 358 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 359 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 360 #if defined(PETSC_USE_COMPLEX) 361 PetscBool hermitian; 362 #endif 363 364 PetscFunctionBegin; 365 #if defined(PETSC_USE_COMPLEX) 366 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 367 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 368 #endif 369 ai = aa->i; 370 aj = aa->j; 371 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 372 if (reuse == MAT_INITIAL_MATRIX) { 373 nz = aa->nz; 374 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 375 col = row + bs2*nz; 376 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 377 else val = aa->a; 378 379 *r = row; *c = col; *v = val; 380 } else { 381 if (bs == 1) *v = aa->a; 382 row = *r; col = *c; val = *v; 383 } 384 385 nz = 0; 386 if (bs>1) { 387 for (i=0; i<mbs; i++) { 388 rnz = ai[i+1] - ai[i]; 389 ajj = aj + ai[i]; 390 for (j=0; j<rnz; j++) { 391 for (k=0; k<bs; k++) { 392 for (m=0; m<bs; m++) { 393 if (ajj[j]>i || k>=m) { 394 if (reuse == MAT_INITIAL_MATRIX) { 395 row[nz] = i*bs + m + shift; 396 col[nz] = ajj[j]*bs + k + shift; 397 } 398 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 399 } 400 } 401 } 402 } 403 } 404 } else if (reuse == MAT_INITIAL_MATRIX) { 405 for (i=0; i<mbs; i++) { 406 rnz = ai[i+1] - ai[i]; 407 ajj = aj + ai[i]; 408 for (j=0; j<rnz; j++) { 409 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 410 } 411 } 412 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 413 } 414 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 415 PetscFunctionReturn(0); 416 } 417 418 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 419 { 420 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 421 PetscInt nz,rnz,i,j; 422 const PetscScalar *av,*v1; 423 PetscScalar *val; 424 PetscErrorCode ierr; 425 PetscInt *row,*col; 426 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 427 PetscBool missing; 428 #if defined(PETSC_USE_COMPLEX) 429 PetscBool hermitian; 430 #endif 431 432 PetscFunctionBegin; 433 #if defined(PETSC_USE_COMPLEX) 434 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 435 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 436 #endif 437 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 438 ai = aa->i; aj = aa->j; 439 adiag = aa->diag; 440 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 441 if (reuse == MAT_INITIAL_MATRIX) { 442 /* count nz in the upper triangular part of A */ 443 nz = 0; 444 if (missing) { 445 for (i=0; i<M; i++) { 446 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 447 for (j=ai[i];j<ai[i+1];j++) { 448 if (aj[j] < i) continue; 449 nz++; 450 } 451 } else { 452 nz += ai[i+1] - adiag[i]; 453 } 454 } 455 } else { 456 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 457 } 458 *nnz = nz; 459 460 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 461 col = row + nz; 462 val = (PetscScalar*)(col + nz); 463 464 nz = 0; 465 if (missing) { 466 for (i=0; i<M; i++) { 467 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 468 for (j=ai[i];j<ai[i+1];j++) { 469 if (aj[j] < i) continue; 470 row[nz] = i+shift; 471 col[nz] = aj[j]+shift; 472 val[nz] = av[j]; 473 nz++; 474 } 475 } else { 476 rnz = ai[i+1] - adiag[i]; 477 ajj = aj + adiag[i]; 478 v1 = av + adiag[i]; 479 for (j=0; j<rnz; j++) { 480 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 481 } 482 } 483 } 484 } else { 485 for (i=0; i<M; i++) { 486 rnz = ai[i+1] - adiag[i]; 487 ajj = aj + adiag[i]; 488 v1 = av + adiag[i]; 489 for (j=0; j<rnz; j++) { 490 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 491 } 492 } 493 } 494 *r = row; *c = col; *v = val; 495 } else { 496 nz = 0; val = *v; 497 if (missing) { 498 for (i=0; i <M; i++) { 499 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 500 for (j=ai[i];j<ai[i+1];j++) { 501 if (aj[j] < i) continue; 502 val[nz++] = av[j]; 503 } 504 } else { 505 rnz = ai[i+1] - adiag[i]; 506 v1 = av + adiag[i]; 507 for (j=0; j<rnz; j++) { 508 val[nz++] = v1[j]; 509 } 510 } 511 } 512 } else { 513 for (i=0; i <M; i++) { 514 rnz = ai[i+1] - adiag[i]; 515 v1 = av + adiag[i]; 516 for (j=0; j<rnz; j++) { 517 val[nz++] = v1[j]; 518 } 519 } 520 } 521 } 522 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 527 { 528 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av,*bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 535 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 536 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 537 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 548 rstart = A->rmap->rstart; 549 ai = aa->i; 550 aj = aa->j; 551 bi = bb->i; 552 bj = bb->j; 553 av = aa->a; 554 bv = bb->a; 555 556 garray = mat->garray; 557 558 if (reuse == MAT_INITIAL_MATRIX) { 559 nz = aa->nz + bb->nz; 560 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 561 col = row + bs2*nz; 562 val = (PetscScalar*)(col + bs2*nz); 563 564 *r = row; *c = col; *v = val; 565 } else { 566 row = *r; col = *c; val = *v; 567 } 568 569 jj = 0; irow = rstart; 570 for (i=0; i<mbs; i++) { 571 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 572 countA = ai[i+1] - ai[i]; 573 countB = bi[i+1] - bi[i]; 574 bjj = bj + bi[i]; 575 v1 = av + ai[i]*bs2; 576 v2 = bv + bi[i]*bs2; 577 578 if (bs>1) { 579 /* A-part */ 580 for (j=0; j<countA; j++) { 581 for (k=0; k<bs; k++) { 582 for (m=0; m<bs; m++) { 583 if (rstart + ajj[j]*bs>irow || k>=m) { 584 if (reuse == MAT_INITIAL_MATRIX) { 585 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 586 } 587 val[jj++] = v1[j*bs2 + m + k*bs]; 588 } 589 } 590 } 591 } 592 593 /* B-part */ 594 for (j=0; j < countB; j++) { 595 for (k=0; k<bs; k++) { 596 for (m=0; m<bs; m++) { 597 if (reuse == MAT_INITIAL_MATRIX) { 598 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 599 } 600 val[jj++] = v2[j*bs2 + m + k*bs]; 601 } 602 } 603 } 604 } else { 605 /* A-part */ 606 for (j=0; j<countA; j++) { 607 if (reuse == MAT_INITIAL_MATRIX) { 608 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 609 } 610 val[jj++] = v1[j]; 611 } 612 613 /* B-part */ 614 for (j=0; j < countB; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 617 } 618 val[jj++] = v2[j]; 619 } 620 } 621 irow+=bs; 622 } 623 *nnz = jj; 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 628 { 629 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 630 PetscErrorCode ierr; 631 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 632 PetscInt *row,*col; 633 const PetscScalar *av, *bv,*v1,*v2; 634 PetscScalar *val; 635 Mat Ad,Ao; 636 Mat_SeqAIJ *aa; 637 Mat_SeqAIJ *bb; 638 639 PetscFunctionBegin; 640 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 641 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 642 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 643 644 aa = (Mat_SeqAIJ*)(Ad)->data; 645 bb = (Mat_SeqAIJ*)(Ao)->data; 646 ai = aa->i; 647 aj = aa->j; 648 bi = bb->i; 649 bj = bb->j; 650 651 rstart = A->rmap->rstart; 652 653 if (reuse == MAT_INITIAL_MATRIX) { 654 nz = aa->nz + bb->nz; 655 *nnz = nz; 656 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 657 col = row + nz; 658 val = (PetscScalar*)(col + nz); 659 660 *r = row; *c = col; *v = val; 661 } else { 662 row = *r; col = *c; val = *v; 663 } 664 665 jj = 0; irow = rstart; 666 for (i=0; i<m; i++) { 667 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 668 countA = ai[i+1] - ai[i]; 669 countB = bi[i+1] - bi[i]; 670 bjj = bj + bi[i]; 671 v1 = av + ai[i]; 672 v2 = bv + bi[i]; 673 674 /* A-part */ 675 for (j=0; j<countA; j++) { 676 if (reuse == MAT_INITIAL_MATRIX) { 677 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 678 } 679 val[jj++] = v1[j]; 680 } 681 682 /* B-part */ 683 for (j=0; j < countB; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 686 } 687 val[jj++] = v2[j]; 688 } 689 irow++; 690 } 691 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 692 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 697 { 698 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 699 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 700 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 701 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 702 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 703 const PetscInt bs2=mat->bs2; 704 PetscErrorCode ierr; 705 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 706 PetscInt *row,*col; 707 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 708 PetscScalar *val; 709 710 PetscFunctionBegin; 711 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 712 if (reuse == MAT_INITIAL_MATRIX) { 713 nz = bs2*(aa->nz + bb->nz); 714 *nnz = nz; 715 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 716 col = row + nz; 717 val = (PetscScalar*)(col + nz); 718 719 *r = row; *c = col; *v = val; 720 } else { 721 row = *r; col = *c; val = *v; 722 } 723 724 jj = 0; irow = rstart; 725 for (i=0; i<mbs; i++) { 726 countA = ai[i+1] - ai[i]; 727 countB = bi[i+1] - bi[i]; 728 ajj = aj + ai[i]; 729 bjj = bj + bi[i]; 730 v1 = av + bs2*ai[i]; 731 v2 = bv + bs2*bi[i]; 732 733 idx = 0; 734 /* A-part */ 735 for (k=0; k<countA; k++) { 736 for (j=0; j<bs; j++) { 737 for (n=0; n<bs; n++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 row[jj] = irow + n + shift; 740 col[jj] = rstart + bs*ajj[k] + j + shift; 741 } 742 val[jj++] = v1[idx++]; 743 } 744 } 745 } 746 747 idx = 0; 748 /* B-part */ 749 for (k=0; k<countB; k++) { 750 for (j=0; j<bs; j++) { 751 for (n=0; n<bs; n++) { 752 if (reuse == MAT_INITIAL_MATRIX) { 753 row[jj] = irow + n + shift; 754 col[jj] = bs*garray[bjj[k]] + j + shift; 755 } 756 val[jj++] = v2[idx++]; 757 } 758 } 759 } 760 irow += bs; 761 } 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 766 { 767 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 768 PetscErrorCode ierr; 769 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 770 PetscInt *row,*col; 771 const PetscScalar *av, *bv,*v1,*v2; 772 PetscScalar *val; 773 Mat Ad,Ao; 774 Mat_SeqAIJ *aa; 775 Mat_SeqAIJ *bb; 776 #if defined(PETSC_USE_COMPLEX) 777 PetscBool hermitian; 778 #endif 779 780 PetscFunctionBegin; 781 #if defined(PETSC_USE_COMPLEX) 782 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 783 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 784 #endif 785 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 786 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 787 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 788 789 aa = (Mat_SeqAIJ*)(Ad)->data; 790 bb = (Mat_SeqAIJ*)(Ao)->data; 791 ai = aa->i; 792 aj = aa->j; 793 adiag = aa->diag; 794 bi = bb->i; 795 bj = bb->j; 796 797 rstart = A->rmap->rstart; 798 799 if (reuse == MAT_INITIAL_MATRIX) { 800 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 801 nzb = 0; /* num of upper triangular entries in mat->B */ 802 for (i=0; i<m; i++) { 803 nza += (ai[i+1] - adiag[i]); 804 countB = bi[i+1] - bi[i]; 805 bjj = bj + bi[i]; 806 for (j=0; j<countB; j++) { 807 if (garray[bjj[j]] > rstart) nzb++; 808 } 809 } 810 811 nz = nza + nzb; /* total nz of upper triangular part of mat */ 812 *nnz = nz; 813 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 814 col = row + nz; 815 val = (PetscScalar*)(col + nz); 816 817 *r = row; *c = col; *v = val; 818 } else { 819 row = *r; col = *c; val = *v; 820 } 821 822 jj = 0; irow = rstart; 823 for (i=0; i<m; i++) { 824 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 825 v1 = av + adiag[i]; 826 countA = ai[i+1] - adiag[i]; 827 countB = bi[i+1] - bi[i]; 828 bjj = bj + bi[i]; 829 v2 = bv + bi[i]; 830 831 /* A-part */ 832 for (j=0; j<countA; j++) { 833 if (reuse == MAT_INITIAL_MATRIX) { 834 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 835 } 836 val[jj++] = v1[j]; 837 } 838 839 /* B-part */ 840 for (j=0; j < countB; j++) { 841 if (garray[bjj[j]] > rstart) { 842 if (reuse == MAT_INITIAL_MATRIX) { 843 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 844 } 845 val[jj++] = v2[j]; 846 } 847 } 848 irow++; 849 } 850 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 851 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatDestroy_MUMPS(Mat A) 856 { 857 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 863 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 864 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 865 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 866 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 867 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 868 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 869 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 870 mumps->id.job = JOB_END; 871 PetscMUMPS_c(mumps); 872 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 873 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 874 #endif 875 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 876 ierr = PetscFree(A->data);CHKERRQ(ierr); 877 878 /* clear composed functions */ 879 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 896 { 897 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 898 PetscScalar *array; 899 Vec b_seq; 900 IS is_iden,is_petsc; 901 PetscErrorCode ierr; 902 PetscInt i; 903 PetscBool second_solve = PETSC_FALSE; 904 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 905 906 PetscFunctionBegin; 907 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 908 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 909 910 if (A->factorerrortype) { 911 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 912 ierr = VecSetInf(x);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 mumps->id.ICNTL(20) = 0; /* dense RHS */ 917 mumps->id.nrhs = 1; 918 b_seq = mumps->b_seq; 919 if (mumps->petsc_size > 1) { 920 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 921 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 924 } else { /* petsc_size == 1 */ 925 ierr = VecCopy(b,x);CHKERRQ(ierr); 926 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 927 } 928 if (!mumps->myid) { /* define rhs on the host */ 929 mumps->id.nrhs = 1; 930 mumps->id.rhs = (MumpsScalar*)array; 931 } 932 933 /* 934 handle condensation step of Schur complement (if any) 935 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 936 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 937 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 938 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 939 */ 940 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 941 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 942 second_solve = PETSC_TRUE; 943 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 944 } 945 /* solve phase */ 946 /*-------------*/ 947 mumps->id.job = JOB_SOLVE; 948 PetscMUMPS_c(mumps); 949 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 950 951 /* handle expansion step of Schur complement (if any) */ 952 if (second_solve) { 953 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 954 } 955 956 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 957 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 958 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 959 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 960 } 961 if (!mumps->scat_sol) { /* create scatter scat_sol */ 962 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 963 for (i=0; i<mumps->id.lsol_loc; i++) { 964 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 965 } 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 967 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 968 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 969 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 970 971 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 972 } 973 974 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 } 977 978 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 979 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 980 981 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 986 { 987 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 mumps->id.ICNTL(9) = 0; 992 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 993 mumps->id.ICNTL(9) = 1; 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 998 { 999 PetscErrorCode ierr; 1000 Mat Bt = NULL; 1001 PetscBool flg, flgT; 1002 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1003 PetscInt i,nrhs,M; 1004 PetscScalar *array; 1005 const PetscScalar *rbray; 1006 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1007 PetscScalar *bray,*sol_loc,*sol_loc_save; 1008 IS is_to,is_from; 1009 PetscInt k,proc,j,m,myrstart; 1010 const PetscInt *rstart; 1011 Vec v_mpi,b_seq,msol_loc; 1012 VecScatter scat_rhs,scat_sol; 1013 PetscScalar *aa; 1014 PetscInt spnr,*ia,*ja; 1015 Mat_MPIAIJ *b = NULL; 1016 1017 PetscFunctionBegin; 1018 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1019 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1020 1021 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1022 if (flg) { /* dense B */ 1023 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1024 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1025 } else { /* sparse B */ 1026 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1027 if (flgT) { /* input B is transpose of actural RHS matrix, 1028 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1029 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1030 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1031 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1032 } 1033 1034 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1035 mumps->id.nrhs = nrhs; 1036 mumps->id.lrhs = M; 1037 mumps->id.rhs = NULL; 1038 1039 if (mumps->petsc_size == 1) { 1040 PetscScalar *aa; 1041 PetscInt spnr,*ia,*ja; 1042 PetscBool second_solve = PETSC_FALSE; 1043 1044 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1045 mumps->id.rhs = (MumpsScalar*)array; 1046 1047 if (!Bt) { /* dense B */ 1048 /* copy B to X */ 1049 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1050 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1051 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1052 } else { /* sparse B */ 1053 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1054 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1055 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1056 /* mumps requires ia and ja start at 1! */ 1057 mumps->id.irhs_ptr = ia; 1058 mumps->id.irhs_sparse = ja; 1059 mumps->id.nz_rhs = ia[spnr] - 1; 1060 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1061 } 1062 /* handle condensation step of Schur complement (if any) */ 1063 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1064 second_solve = PETSC_TRUE; 1065 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1066 } 1067 /* solve phase */ 1068 /*-------------*/ 1069 mumps->id.job = JOB_SOLVE; 1070 PetscMUMPS_c(mumps); 1071 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1072 1073 /* handle expansion step of Schur complement (if any) */ 1074 if (second_solve) { 1075 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1076 } 1077 if (Bt) { /* sparse B */ 1078 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1079 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1080 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1081 } 1082 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1087 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1088 1089 /* create msol_loc to hold mumps local solution */ 1090 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1091 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1092 1093 lsol_loc = mumps->id.lsol_loc; 1094 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1095 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1096 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1097 mumps->id.isol_loc = isol_loc; 1098 1099 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1100 1101 if (!Bt) { /* dense B */ 1102 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1103 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1104 0, re-arrange B into desired order, which is a local operation. 1105 */ 1106 1107 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1108 /* wrap dense rhs matrix B into a vector v_mpi */ 1109 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1110 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1111 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1112 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1113 1114 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1115 if (!mumps->myid) { 1116 PetscInt *idx; 1117 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1118 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1120 k = 0; 1121 for (proc=0; proc<mumps->petsc_size; proc++){ 1122 for (j=0; j<nrhs; j++){ 1123 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1124 } 1125 } 1126 1127 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1128 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1129 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1130 } else { 1131 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1132 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1133 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1134 } 1135 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1136 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1138 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 1141 if (!mumps->myid) { /* define rhs on the host */ 1142 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1143 mumps->id.rhs = (MumpsScalar*)bray; 1144 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1145 } 1146 1147 } else { /* sparse B */ 1148 b = (Mat_MPIAIJ*)Bt->data; 1149 1150 /* wrap dense X into a vector v_mpi */ 1151 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1152 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1153 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1154 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1155 1156 if (!mumps->myid) { 1157 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1158 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1159 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1160 /* mumps requires ia and ja start at 1! */ 1161 mumps->id.irhs_ptr = ia; 1162 mumps->id.irhs_sparse = ja; 1163 mumps->id.nz_rhs = ia[spnr] - 1; 1164 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1165 } else { 1166 mumps->id.irhs_ptr = NULL; 1167 mumps->id.irhs_sparse = NULL; 1168 mumps->id.nz_rhs = 0; 1169 mumps->id.rhs_sparse = NULL; 1170 } 1171 } 1172 1173 /* solve phase */ 1174 /*-------------*/ 1175 mumps->id.job = JOB_SOLVE; 1176 PetscMUMPS_c(mumps); 1177 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1178 1179 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1180 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1181 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1182 1183 /* create scatter scat_sol */ 1184 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1185 /* iidx: index for scatter mumps solution to petsc X */ 1186 1187 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1188 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1189 for (i=0; i<lsol_loc; i++) { 1190 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1191 1192 for (proc=0; proc<mumps->petsc_size; proc++){ 1193 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1194 myrstart = rstart[proc]; 1195 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1196 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1197 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1198 break; 1199 } 1200 } 1201 1202 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1203 } 1204 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1205 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1206 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1211 1212 /* free spaces */ 1213 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1214 mumps->id.isol_loc = isol_loc_save; 1215 1216 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1217 ierr = PetscFree(idxx);CHKERRQ(ierr); 1218 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1219 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1220 if (Bt) { 1221 if (!mumps->myid) { 1222 b = (Mat_MPIAIJ*)Bt->data; 1223 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1224 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1225 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1226 } 1227 } else { 1228 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1229 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1230 } 1231 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1232 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1237 { 1238 PetscErrorCode ierr; 1239 PetscBool flg; 1240 Mat B; 1241 1242 PetscFunctionBegin; 1243 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1244 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1245 1246 /* Create B=Bt^T that uses Bt's data structure */ 1247 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1248 1249 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1250 ierr = MatDestroy(&B);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #if !defined(PETSC_USE_COMPLEX) 1255 /* 1256 input: 1257 F: numeric factor 1258 output: 1259 nneg: total number of negative pivots 1260 nzero: total number of zero pivots 1261 npos: (global dimension of F) - nneg - nzero 1262 */ 1263 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1264 { 1265 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1266 PetscErrorCode ierr; 1267 PetscMPIInt size; 1268 1269 PetscFunctionBegin; 1270 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1271 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1272 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1273 1274 if (nneg) *nneg = mumps->id.INFOG(12); 1275 if (nzero || npos) { 1276 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1277 if (nzero) *nzero = mumps->id.INFOG(28); 1278 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 #endif 1283 1284 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1285 { 1286 PetscErrorCode ierr; 1287 PetscInt i,nz=0,*irn,*jcn=0; 1288 PetscScalar *val=0; 1289 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1290 1291 PetscFunctionBegin; 1292 if (mumps->omp_comm_size > 1) { 1293 if (reuse == MAT_INITIAL_MATRIX) { 1294 /* master first gathers counts of nonzeros to receive */ 1295 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1296 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1297 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1298 1299 /* master allocates memory to receive nonzeros */ 1300 if (mumps->is_omp_master) { 1301 displs[0] = 0; 1302 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1303 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1304 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1305 jcn = irn + nz; 1306 val = (PetscScalar*)(jcn + nz); 1307 } 1308 1309 /* save the gatherv plan */ 1310 mumps->mpinz = mpinz; /* used as send count */ 1311 mumps->recvcount = recvcount; 1312 mumps->displs = displs; 1313 1314 /* master gathers nonzeros */ 1315 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1316 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1317 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1318 1319 /* master frees its row/col/val and replaces them with bigger arrays */ 1320 if (mumps->is_omp_master) { 1321 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1322 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1323 mumps->irn = irn; 1324 mumps->jcn = jcn; 1325 mumps->val = val; 1326 } 1327 } else { 1328 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1329 } 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1335 { 1336 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1337 PetscErrorCode ierr; 1338 PetscBool isMPIAIJ; 1339 1340 PetscFunctionBegin; 1341 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1342 if (mumps->id.INFOG(1) == -6) { 1343 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1344 } 1345 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1350 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1351 1352 /* numerical factorization phase */ 1353 /*-------------------------------*/ 1354 mumps->id.job = JOB_FACTNUMERIC; 1355 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1356 if (!mumps->myid) { 1357 mumps->id.a = (MumpsScalar*)mumps->val; 1358 } 1359 } else { 1360 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1361 } 1362 PetscMUMPS_c(mumps); 1363 if (mumps->id.INFOG(1) < 0) { 1364 if (A->erroriffailure) { 1365 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1366 } else { 1367 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1368 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1369 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1370 } else if (mumps->id.INFOG(1) == -13) { 1371 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1372 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1373 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1374 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1375 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1376 } else { 1377 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1378 F->factorerrortype = MAT_FACTOR_OTHER; 1379 } 1380 } 1381 } 1382 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1383 1384 F->assembled = PETSC_TRUE; 1385 mumps->matstruc = SAME_NONZERO_PATTERN; 1386 if (F->schur) { /* reset Schur status to unfactored */ 1387 #if defined(PETSC_HAVE_CUDA) 1388 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1389 #endif 1390 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1391 mumps->id.ICNTL(19) = 2; 1392 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1393 } 1394 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1395 } 1396 1397 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1398 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1399 1400 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1401 if (mumps->petsc_size > 1) { 1402 PetscInt lsol_loc; 1403 PetscScalar *sol_loc; 1404 1405 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1406 1407 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1408 if (mumps->x_seq) { 1409 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1410 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1411 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1412 } 1413 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1414 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1415 mumps->id.lsol_loc = lsol_loc; 1416 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1417 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1418 } 1419 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 /* Sets MUMPS options from the options database */ 1424 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1425 { 1426 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1427 PetscErrorCode ierr; 1428 PetscInt icntl,info[80],i,ninfo=80; 1429 PetscBool flg; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1433 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1434 if (flg) mumps->id.ICNTL(1) = icntl; 1435 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1436 if (flg) mumps->id.ICNTL(2) = icntl; 1437 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1438 if (flg) mumps->id.ICNTL(3) = icntl; 1439 1440 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1441 if (flg) mumps->id.ICNTL(4) = icntl; 1442 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1443 1444 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1445 if (flg) mumps->id.ICNTL(6) = icntl; 1446 1447 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1448 if (flg) { 1449 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1450 else mumps->id.ICNTL(7) = icntl; 1451 } 1452 1453 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1454 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1455 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1456 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1458 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1460 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1461 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1462 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1463 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1464 } 1465 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1466 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1467 1468 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1470 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1471 if (mumps->id.ICNTL(24)) { 1472 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1473 } 1474 1475 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1476 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1477 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1480 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1481 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1482 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1483 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1484 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1487 1488 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1489 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1490 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1491 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1492 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1493 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1494 1495 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1496 1497 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1498 if (ninfo) { 1499 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1500 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1501 mumps->ninfo = ninfo; 1502 for (i=0; i<ninfo; i++) { 1503 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1504 else mumps->info[i] = info[i]; 1505 } 1506 } 1507 1508 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1513 { 1514 PetscErrorCode ierr; 1515 PetscInt nthreads=0; 1516 1517 PetscFunctionBegin; 1518 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1519 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1520 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1521 1522 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1523 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1524 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1525 if (mumps->use_petsc_omp_support) { 1526 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1527 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1528 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1529 #else 1530 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1531 #endif 1532 } else { 1533 mumps->omp_comm = PETSC_COMM_SELF; 1534 mumps->mumps_comm = mumps->petsc_comm; 1535 mumps->is_omp_master = PETSC_TRUE; 1536 } 1537 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1538 1539 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1540 mumps->id.job = JOB_INIT; 1541 mumps->id.par = 1; /* host participates factorizaton and solve */ 1542 mumps->id.sym = mumps->sym; 1543 1544 PetscMUMPS_c(mumps); 1545 1546 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1547 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1548 */ 1549 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1550 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1551 1552 mumps->scat_rhs = NULL; 1553 mumps->scat_sol = NULL; 1554 1555 /* set PETSc-MUMPS default options - override MUMPS default */ 1556 mumps->id.ICNTL(3) = 0; 1557 mumps->id.ICNTL(4) = 0; 1558 if (mumps->petsc_size == 1) { 1559 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1560 } else { 1561 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1562 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1563 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1564 } 1565 1566 /* schur */ 1567 mumps->id.size_schur = 0; 1568 mumps->id.listvar_schur = NULL; 1569 mumps->id.schur = NULL; 1570 mumps->sizeredrhs = 0; 1571 mumps->schur_sol = NULL; 1572 mumps->schur_sizesol = 0; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1577 { 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1582 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1583 if (mumps->id.INFOG(1) < 0) { 1584 if (A->erroriffailure) { 1585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1586 } else { 1587 if (mumps->id.INFOG(1) == -6) { 1588 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1589 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1590 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1591 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1592 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1593 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1594 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1595 } else { 1596 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1597 F->factorerrortype = MAT_FACTOR_OTHER; 1598 } 1599 } 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1605 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1606 { 1607 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1608 PetscErrorCode ierr; 1609 Vec b; 1610 const PetscInt M = A->rmap->N; 1611 1612 PetscFunctionBegin; 1613 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1614 1615 /* Set MUMPS options from the options database */ 1616 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1617 1618 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1619 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1620 1621 /* analysis phase */ 1622 /*----------------*/ 1623 mumps->id.job = JOB_FACTSYMBOLIC; 1624 mumps->id.n = M; 1625 switch (mumps->id.ICNTL(18)) { 1626 case 0: /* centralized assembled matrix input */ 1627 if (!mumps->myid) { 1628 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1629 if (mumps->id.ICNTL(6)>1) { 1630 mumps->id.a = (MumpsScalar*)mumps->val; 1631 } 1632 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1633 /* 1634 PetscBool flag; 1635 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1636 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1637 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1638 */ 1639 if (!mumps->myid) { 1640 const PetscInt *idx; 1641 PetscInt i,*perm_in; 1642 1643 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1644 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1645 1646 mumps->id.perm_in = perm_in; 1647 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1648 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 break; 1653 case 3: /* distributed assembled matrix input (size>1) */ 1654 mumps->id.nz_loc = mumps->nz; 1655 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1656 if (mumps->id.ICNTL(6)>1) { 1657 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1658 } 1659 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1660 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1661 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1662 ierr = VecDestroy(&b);CHKERRQ(ierr); 1663 break; 1664 } 1665 PetscMUMPS_c(mumps); 1666 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1667 1668 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1669 F->ops->solve = MatSolve_MUMPS; 1670 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1671 F->ops->matsolve = MatMatSolve_MUMPS; 1672 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 /* Note the Petsc r and c permutations are ignored */ 1677 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1678 { 1679 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1680 PetscErrorCode ierr; 1681 Vec b; 1682 const PetscInt M = A->rmap->N; 1683 1684 PetscFunctionBegin; 1685 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1686 1687 /* Set MUMPS options from the options database */ 1688 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1689 1690 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1691 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1692 1693 /* analysis phase */ 1694 /*----------------*/ 1695 mumps->id.job = JOB_FACTSYMBOLIC; 1696 mumps->id.n = M; 1697 switch (mumps->id.ICNTL(18)) { 1698 case 0: /* centralized assembled matrix input */ 1699 if (!mumps->myid) { 1700 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1701 if (mumps->id.ICNTL(6)>1) { 1702 mumps->id.a = (MumpsScalar*)mumps->val; 1703 } 1704 } 1705 break; 1706 case 3: /* distributed assembled matrix input (size>1) */ 1707 mumps->id.nz_loc = mumps->nz; 1708 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1709 if (mumps->id.ICNTL(6)>1) { 1710 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1711 } 1712 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1713 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1714 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1715 ierr = VecDestroy(&b);CHKERRQ(ierr); 1716 break; 1717 } 1718 PetscMUMPS_c(mumps); 1719 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1720 1721 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1722 F->ops->solve = MatSolve_MUMPS; 1723 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1724 PetscFunctionReturn(0); 1725 } 1726 1727 /* Note the Petsc r permutation and factor info are ignored */ 1728 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1729 { 1730 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1731 PetscErrorCode ierr; 1732 Vec b; 1733 const PetscInt M = A->rmap->N; 1734 1735 PetscFunctionBegin; 1736 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1737 1738 /* Set MUMPS options from the options database */ 1739 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1740 1741 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1742 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1743 1744 /* analysis phase */ 1745 /*----------------*/ 1746 mumps->id.job = JOB_FACTSYMBOLIC; 1747 mumps->id.n = M; 1748 switch (mumps->id.ICNTL(18)) { 1749 case 0: /* centralized assembled matrix input */ 1750 if (!mumps->myid) { 1751 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1752 if (mumps->id.ICNTL(6)>1) { 1753 mumps->id.a = (MumpsScalar*)mumps->val; 1754 } 1755 } 1756 break; 1757 case 3: /* distributed assembled matrix input (size>1) */ 1758 mumps->id.nz_loc = mumps->nz; 1759 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1760 if (mumps->id.ICNTL(6)>1) { 1761 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1762 } 1763 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1764 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1765 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1766 ierr = VecDestroy(&b);CHKERRQ(ierr); 1767 break; 1768 } 1769 PetscMUMPS_c(mumps); 1770 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1771 1772 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1773 F->ops->solve = MatSolve_MUMPS; 1774 F->ops->solvetranspose = MatSolve_MUMPS; 1775 F->ops->matsolve = MatMatSolve_MUMPS; 1776 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1777 #if defined(PETSC_USE_COMPLEX) 1778 F->ops->getinertia = NULL; 1779 #else 1780 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1781 #endif 1782 PetscFunctionReturn(0); 1783 } 1784 1785 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1786 { 1787 PetscErrorCode ierr; 1788 PetscBool iascii; 1789 PetscViewerFormat format; 1790 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1791 1792 PetscFunctionBegin; 1793 /* check if matrix is mumps type */ 1794 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1795 1796 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1797 if (iascii) { 1798 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1799 if (format == PETSC_VIEWER_ASCII_INFO) { 1800 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1813 if (mumps->id.ICNTL(11)>0) { 1814 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1820 } 1821 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1824 /* ICNTL(15-17) not used */ 1825 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1831 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1838 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1845 1846 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1852 1853 /* infomation local to each processor */ 1854 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1856 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1857 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1859 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1860 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1862 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1863 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1864 1865 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1866 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1867 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1868 1869 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1870 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1871 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1872 1873 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1874 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1876 1877 if (mumps->ninfo && mumps->ninfo <= 80){ 1878 PetscInt i; 1879 for (i=0; i<mumps->ninfo; i++){ 1880 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1881 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1882 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1883 } 1884 } 1885 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1886 1887 if (!mumps->myid) { /* information from the host */ 1888 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1890 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1891 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1892 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1925 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1926 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1927 } 1928 } 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1934 { 1935 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1936 1937 PetscFunctionBegin; 1938 info->block_size = 1.0; 1939 info->nz_allocated = mumps->id.INFOG(20); 1940 info->nz_used = mumps->id.INFOG(20); 1941 info->nz_unneeded = 0.0; 1942 info->assemblies = 0.0; 1943 info->mallocs = 0.0; 1944 info->memory = 0.0; 1945 info->fill_ratio_given = 0; 1946 info->fill_ratio_needed = 0; 1947 info->factor_mallocs = 0; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* -------------------------------------------------------------------------------------------*/ 1952 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1953 { 1954 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1955 const PetscScalar *arr; 1956 const PetscInt *idxs; 1957 PetscInt size,i; 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1962 if (mumps->petsc_size > 1) { 1963 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1964 1965 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1966 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1967 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1968 } 1969 1970 /* Schur complement matrix */ 1971 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1972 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1973 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1974 mumps->id.schur = (MumpsScalar*)arr; 1975 mumps->id.size_schur = size; 1976 mumps->id.schur_lld = size; 1977 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1978 if (mumps->sym == 1) { 1979 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1980 } 1981 1982 /* MUMPS expects Fortran style indices */ 1983 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1984 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1985 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1986 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1987 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1988 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1989 if (mumps->petsc_size > 1) { 1990 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1991 } else { 1992 if (F->factortype == MAT_FACTOR_LU) { 1993 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1994 } else { 1995 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1996 } 1997 } 1998 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1999 mumps->id.ICNTL(26) = -1; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /* -------------------------------------------------------------------------------------------*/ 2004 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2005 { 2006 Mat St; 2007 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2008 PetscScalar *array; 2009 #if defined(PETSC_USE_COMPLEX) 2010 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2011 #endif 2012 PetscErrorCode ierr; 2013 2014 PetscFunctionBegin; 2015 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2016 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2017 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2018 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2019 ierr = MatSetUp(St);CHKERRQ(ierr); 2020 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2021 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2022 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2023 PetscInt i,j,N=mumps->id.size_schur; 2024 for (i=0;i<N;i++) { 2025 for (j=0;j<N;j++) { 2026 #if !defined(PETSC_USE_COMPLEX) 2027 PetscScalar val = mumps->id.schur[i*N+j]; 2028 #else 2029 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2030 #endif 2031 array[j*N+i] = val; 2032 } 2033 } 2034 } else { /* stored by columns */ 2035 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2036 } 2037 } else { /* either full or lower-triangular (not packed) */ 2038 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2039 PetscInt i,j,N=mumps->id.size_schur; 2040 for (i=0;i<N;i++) { 2041 for (j=i;j<N;j++) { 2042 #if !defined(PETSC_USE_COMPLEX) 2043 PetscScalar val = mumps->id.schur[i*N+j]; 2044 #else 2045 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2046 #endif 2047 array[i*N+j] = val; 2048 array[j*N+i] = val; 2049 } 2050 } 2051 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2052 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2053 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2054 PetscInt i,j,N=mumps->id.size_schur; 2055 for (i=0;i<N;i++) { 2056 for (j=0;j<i+1;j++) { 2057 #if !defined(PETSC_USE_COMPLEX) 2058 PetscScalar val = mumps->id.schur[i*N+j]; 2059 #else 2060 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2061 #endif 2062 array[i*N+j] = val; 2063 array[j*N+i] = val; 2064 } 2065 } 2066 } 2067 } 2068 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2069 *S = St; 2070 PetscFunctionReturn(0); 2071 } 2072 2073 /* -------------------------------------------------------------------------------------------*/ 2074 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2075 { 2076 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2077 2078 PetscFunctionBegin; 2079 mumps->id.ICNTL(icntl) = ival; 2080 PetscFunctionReturn(0); 2081 } 2082 2083 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2084 { 2085 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2086 2087 PetscFunctionBegin; 2088 *ival = mumps->id.ICNTL(icntl); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 /*@ 2093 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2094 2095 Logically Collective on Mat 2096 2097 Input Parameters: 2098 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2099 . icntl - index of MUMPS parameter array ICNTL() 2100 - ival - value of MUMPS ICNTL(icntl) 2101 2102 Options Database: 2103 . -mat_mumps_icntl_<icntl> <ival> 2104 2105 Level: beginner 2106 2107 References: 2108 . MUMPS Users' Guide 2109 2110 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2111 @*/ 2112 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2113 { 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 PetscValidType(F,1); 2118 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2119 PetscValidLogicalCollectiveInt(F,icntl,2); 2120 PetscValidLogicalCollectiveInt(F,ival,3); 2121 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /*@ 2126 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2127 2128 Logically Collective on Mat 2129 2130 Input Parameters: 2131 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2132 - icntl - index of MUMPS parameter array ICNTL() 2133 2134 Output Parameter: 2135 . ival - value of MUMPS ICNTL(icntl) 2136 2137 Level: beginner 2138 2139 References: 2140 . MUMPS Users' Guide 2141 2142 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2143 @*/ 2144 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2145 { 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 PetscValidType(F,1); 2150 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2151 PetscValidLogicalCollectiveInt(F,icntl,2); 2152 PetscValidIntPointer(ival,3); 2153 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /* -------------------------------------------------------------------------------------------*/ 2158 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2159 { 2160 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2161 2162 PetscFunctionBegin; 2163 mumps->id.CNTL(icntl) = val; 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2168 { 2169 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2170 2171 PetscFunctionBegin; 2172 *val = mumps->id.CNTL(icntl); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /*@ 2177 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2178 2179 Logically Collective on Mat 2180 2181 Input Parameters: 2182 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2183 . icntl - index of MUMPS parameter array CNTL() 2184 - val - value of MUMPS CNTL(icntl) 2185 2186 Options Database: 2187 . -mat_mumps_cntl_<icntl> <val> 2188 2189 Level: beginner 2190 2191 References: 2192 . MUMPS Users' Guide 2193 2194 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2195 @*/ 2196 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2197 { 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 PetscValidType(F,1); 2202 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2203 PetscValidLogicalCollectiveInt(F,icntl,2); 2204 PetscValidLogicalCollectiveReal(F,val,3); 2205 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /*@ 2210 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2211 2212 Logically Collective on Mat 2213 2214 Input Parameters: 2215 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2216 - icntl - index of MUMPS parameter array CNTL() 2217 2218 Output Parameter: 2219 . val - value of MUMPS CNTL(icntl) 2220 2221 Level: beginner 2222 2223 References: 2224 . MUMPS Users' Guide 2225 2226 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2227 @*/ 2228 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2229 { 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 PetscValidType(F,1); 2234 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2235 PetscValidLogicalCollectiveInt(F,icntl,2); 2236 PetscValidRealPointer(val,3); 2237 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2242 { 2243 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2244 2245 PetscFunctionBegin; 2246 *info = mumps->id.INFO(icntl); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2251 { 2252 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2253 2254 PetscFunctionBegin; 2255 *infog = mumps->id.INFOG(icntl); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2260 { 2261 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2262 2263 PetscFunctionBegin; 2264 *rinfo = mumps->id.RINFO(icntl); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2269 { 2270 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2271 2272 PetscFunctionBegin; 2273 *rinfog = mumps->id.RINFOG(icntl); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2278 { 2279 PetscErrorCode ierr; 2280 Mat Bt = NULL,Btseq = NULL; 2281 PetscBool flg; 2282 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2283 PetscScalar *aa; 2284 PetscInt spnr,*ia,*ja,M,nrhs; 2285 2286 PetscFunctionBegin; 2287 PetscValidIntPointer(spRHS,2); 2288 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2289 if (flg) { 2290 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2291 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2292 2293 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2294 2295 if (mumps->petsc_size > 1) { 2296 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2297 Btseq = b->A; 2298 } else { 2299 Btseq = Bt; 2300 } 2301 2302 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2303 mumps->id.nrhs = nrhs; 2304 mumps->id.lrhs = M; 2305 mumps->id.rhs = NULL; 2306 2307 if (!mumps->myid) { 2308 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2309 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2310 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2311 2312 mumps->id.irhs_ptr = ia; 2313 mumps->id.irhs_sparse = ja; 2314 mumps->id.nz_rhs = ia[spnr] - 1; 2315 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2316 } else { 2317 mumps->id.irhs_ptr = NULL; 2318 mumps->id.irhs_sparse = NULL; 2319 mumps->id.nz_rhs = 0; 2320 mumps->id.rhs_sparse = NULL; 2321 } 2322 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2323 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2324 2325 /* solve phase */ 2326 /*-------------*/ 2327 mumps->id.job = JOB_SOLVE; 2328 PetscMUMPS_c(mumps); 2329 if (mumps->id.INFOG(1) < 0) 2330 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2331 2332 if (!mumps->myid) { 2333 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2334 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2335 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2336 } 2337 PetscFunctionReturn(0); 2338 } 2339 2340 /*@ 2341 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2342 2343 Logically Collective on Mat 2344 2345 Input Parameters: 2346 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2347 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2348 2349 Output Parameter: 2350 . spRHS - requested entries of inverse of A 2351 2352 Level: beginner 2353 2354 References: 2355 . MUMPS Users' Guide 2356 2357 .seealso: MatGetFactor(), MatCreateTranspose() 2358 @*/ 2359 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2360 { 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 PetscValidType(F,1); 2365 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2366 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2367 PetscFunctionReturn(0); 2368 } 2369 2370 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2371 { 2372 PetscErrorCode ierr; 2373 Mat spRHS; 2374 2375 PetscFunctionBegin; 2376 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2377 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2378 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 /*@ 2383 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2384 2385 Logically Collective on Mat 2386 2387 Input Parameters: 2388 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2389 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2390 2391 Output Parameter: 2392 . spRHST - requested entries of inverse of A^T 2393 2394 Level: beginner 2395 2396 References: 2397 . MUMPS Users' Guide 2398 2399 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2400 @*/ 2401 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2402 { 2403 PetscErrorCode ierr; 2404 PetscBool flg; 2405 2406 PetscFunctionBegin; 2407 PetscValidType(F,1); 2408 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2409 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2410 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2411 2412 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 /*@ 2417 MatMumpsGetInfo - Get MUMPS parameter INFO() 2418 2419 Logically Collective on Mat 2420 2421 Input Parameters: 2422 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2423 - icntl - index of MUMPS parameter array INFO() 2424 2425 Output Parameter: 2426 . ival - value of MUMPS INFO(icntl) 2427 2428 Level: beginner 2429 2430 References: 2431 . MUMPS Users' Guide 2432 2433 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2434 @*/ 2435 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2436 { 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 PetscValidType(F,1); 2441 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2442 PetscValidIntPointer(ival,3); 2443 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 /*@ 2448 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2449 2450 Logically Collective on Mat 2451 2452 Input Parameters: 2453 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2454 - icntl - index of MUMPS parameter array INFOG() 2455 2456 Output Parameter: 2457 . ival - value of MUMPS INFOG(icntl) 2458 2459 Level: beginner 2460 2461 References: 2462 . MUMPS Users' Guide 2463 2464 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2465 @*/ 2466 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2467 { 2468 PetscErrorCode ierr; 2469 2470 PetscFunctionBegin; 2471 PetscValidType(F,1); 2472 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2473 PetscValidIntPointer(ival,3); 2474 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 /*@ 2479 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2480 2481 Logically Collective on Mat 2482 2483 Input Parameters: 2484 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2485 - icntl - index of MUMPS parameter array RINFO() 2486 2487 Output Parameter: 2488 . val - value of MUMPS RINFO(icntl) 2489 2490 Level: beginner 2491 2492 References: 2493 . MUMPS Users' Guide 2494 2495 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2496 @*/ 2497 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2498 { 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 PetscValidType(F,1); 2503 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2504 PetscValidRealPointer(val,3); 2505 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /*@ 2510 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2511 2512 Logically Collective on Mat 2513 2514 Input Parameters: 2515 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2516 - icntl - index of MUMPS parameter array RINFOG() 2517 2518 Output Parameter: 2519 . val - value of MUMPS RINFOG(icntl) 2520 2521 Level: beginner 2522 2523 References: 2524 . MUMPS Users' Guide 2525 2526 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2527 @*/ 2528 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2529 { 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 PetscValidType(F,1); 2534 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2535 PetscValidRealPointer(val,3); 2536 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 /*MC 2541 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2542 distributed and sequential matrices via the external package MUMPS. 2543 2544 Works with MATAIJ and MATSBAIJ matrices 2545 2546 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2547 2548 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2549 2550 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2551 2552 Options Database Keys: 2553 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2554 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2555 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2556 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2557 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2558 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2559 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2560 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2561 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2562 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2563 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2564 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2565 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2566 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2567 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2568 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2569 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2570 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2571 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2572 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2573 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2574 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2575 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2576 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2577 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2578 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2579 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2580 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2581 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2582 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2583 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2584 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2585 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2586 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2587 2588 Level: beginner 2589 2590 Notes: 2591 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2592 2593 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2594 $ KSPGetPC(ksp,&pc); 2595 $ PCFactorGetMatrix(pc,&mat); 2596 $ MatMumpsGetInfo(mat,....); 2597 $ MatMumpsGetInfog(mat,....); etc. 2598 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2599 2600 Two modes to run MUMPS/PETSc with OpenMP 2601 2602 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2603 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2604 2605 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2606 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2607 2608 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2609 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2610 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2611 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2612 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2613 2614 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2615 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2616 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2617 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2618 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2619 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2620 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2621 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2622 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2623 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2624 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2625 examine the mapping result. 2626 2627 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2628 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2629 calls omp_set_num_threads(m) internally before calling MUMPS. 2630 2631 References: 2632 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2633 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2634 2635 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2636 2637 M*/ 2638 2639 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2640 { 2641 PetscFunctionBegin; 2642 *type = MATSOLVERMUMPS; 2643 PetscFunctionReturn(0); 2644 } 2645 2646 /* MatGetFactor for Seq and MPI AIJ matrices */ 2647 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2648 { 2649 Mat B; 2650 PetscErrorCode ierr; 2651 Mat_MUMPS *mumps; 2652 PetscBool isSeqAIJ; 2653 2654 PetscFunctionBegin; 2655 /* Create the factorization matrix */ 2656 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2657 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2658 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2659 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2660 ierr = MatSetUp(B);CHKERRQ(ierr); 2661 2662 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2663 2664 B->ops->view = MatView_MUMPS; 2665 B->ops->getinfo = MatGetInfo_MUMPS; 2666 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2680 2681 if (ftype == MAT_FACTOR_LU) { 2682 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2683 B->factortype = MAT_FACTOR_LU; 2684 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2685 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2686 mumps->sym = 0; 2687 } else { 2688 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2689 B->factortype = MAT_FACTOR_CHOLESKY; 2690 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2691 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2692 #if defined(PETSC_USE_COMPLEX) 2693 mumps->sym = 2; 2694 #else 2695 if (A->spd_set && A->spd) mumps->sym = 1; 2696 else mumps->sym = 2; 2697 #endif 2698 } 2699 2700 /* set solvertype */ 2701 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2702 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2703 2704 B->ops->destroy = MatDestroy_MUMPS; 2705 B->data = (void*)mumps; 2706 2707 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2708 2709 *F = B; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2714 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2715 { 2716 Mat B; 2717 PetscErrorCode ierr; 2718 Mat_MUMPS *mumps; 2719 PetscBool isSeqSBAIJ; 2720 2721 PetscFunctionBegin; 2722 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2723 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2724 /* Create the factorization matrix */ 2725 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2726 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2727 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2728 ierr = MatSetUp(B);CHKERRQ(ierr); 2729 2730 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2731 if (isSeqSBAIJ) { 2732 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2733 } else { 2734 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2735 } 2736 2737 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2738 B->ops->view = MatView_MUMPS; 2739 B->ops->getinfo = MatGetInfo_MUMPS; 2740 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2751 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2752 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2753 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2754 2755 B->factortype = MAT_FACTOR_CHOLESKY; 2756 #if defined(PETSC_USE_COMPLEX) 2757 mumps->sym = 2; 2758 #else 2759 if (A->spd_set && A->spd) mumps->sym = 1; 2760 else mumps->sym = 2; 2761 #endif 2762 2763 /* set solvertype */ 2764 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2765 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2766 2767 B->ops->destroy = MatDestroy_MUMPS; 2768 B->data = (void*)mumps; 2769 2770 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2771 2772 *F = B; 2773 PetscFunctionReturn(0); 2774 } 2775 2776 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2777 { 2778 Mat B; 2779 PetscErrorCode ierr; 2780 Mat_MUMPS *mumps; 2781 PetscBool isSeqBAIJ; 2782 2783 PetscFunctionBegin; 2784 /* Create the factorization matrix */ 2785 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2786 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2787 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2788 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2789 ierr = MatSetUp(B);CHKERRQ(ierr); 2790 2791 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2792 if (ftype == MAT_FACTOR_LU) { 2793 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2794 B->factortype = MAT_FACTOR_LU; 2795 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2796 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2797 mumps->sym = 0; 2798 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2799 2800 B->ops->view = MatView_MUMPS; 2801 B->ops->getinfo = MatGetInfo_MUMPS; 2802 2803 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2804 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2806 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2809 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2816 2817 /* set solvertype */ 2818 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2819 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2820 2821 B->ops->destroy = MatDestroy_MUMPS; 2822 B->data = (void*)mumps; 2823 2824 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2825 2826 *F = B; 2827 PetscFunctionReturn(0); 2828 } 2829 2830 /* MatGetFactor for Seq and MPI SELL matrices */ 2831 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2832 { 2833 Mat B; 2834 PetscErrorCode ierr; 2835 Mat_MUMPS *mumps; 2836 PetscBool isSeqSELL; 2837 2838 PetscFunctionBegin; 2839 /* Create the factorization matrix */ 2840 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2841 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2842 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2843 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2844 ierr = MatSetUp(B);CHKERRQ(ierr); 2845 2846 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2847 2848 B->ops->view = MatView_MUMPS; 2849 B->ops->getinfo = MatGetInfo_MUMPS; 2850 2851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2860 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2861 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2862 2863 if (ftype == MAT_FACTOR_LU) { 2864 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2865 B->factortype = MAT_FACTOR_LU; 2866 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2867 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2868 mumps->sym = 0; 2869 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2870 2871 /* set solvertype */ 2872 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2873 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2874 2875 B->ops->destroy = MatDestroy_MUMPS; 2876 B->data = (void*)mumps; 2877 2878 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2879 2880 *F = B; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2885 { 2886 PetscErrorCode ierr; 2887 2888 PetscFunctionBegin; 2889 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2890 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2891 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2892 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2893 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2894 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2895 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2896 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2897 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2898 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2899 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2900 PetscFunctionReturn(0); 2901 } 2902 2903