1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 58 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 59 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 60 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 61 */ \ 62 ierr = MPI_Bcast(mumps->id.infog, 40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 63 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); \ 64 ierr = MPI_Bcast(mumps->id.info, 1, MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 65 } else { \ 66 MUMPS_c(&mumps->id); \ 67 } \ 68 } while(0) 69 #else 70 #define PetscMUMPS_c(mumps) \ 71 do { MUMPS_c(&mumps->id); } while (0) 72 #endif 73 74 /* declare MumpsScalar */ 75 #if defined(PETSC_USE_COMPLEX) 76 #if defined(PETSC_USE_REAL_SINGLE) 77 #define MumpsScalar mumps_complex 78 #else 79 #define MumpsScalar mumps_double_complex 80 #endif 81 #else 82 #define MumpsScalar PetscScalar 83 #endif 84 85 /* macros s.t. indices match MUMPS documentation */ 86 #define ICNTL(I) icntl[(I)-1] 87 #define CNTL(I) cntl[(I)-1] 88 #define INFOG(I) infog[(I)-1] 89 #define INFO(I) info[(I)-1] 90 #define RINFOG(I) rinfog[(I)-1] 91 #define RINFO(I) rinfo[(I)-1] 92 93 typedef struct { 94 #if defined(PETSC_USE_COMPLEX) 95 #if defined(PETSC_USE_REAL_SINGLE) 96 CMUMPS_STRUC_C id; 97 #else 98 ZMUMPS_STRUC_C id; 99 #endif 100 #else 101 #if defined(PETSC_USE_REAL_SINGLE) 102 SMUMPS_STRUC_C id; 103 #else 104 DMUMPS_STRUC_C id; 105 #endif 106 #endif 107 108 MatStructure matstruc; 109 PetscMPIInt myid,petsc_size; 110 PetscInt *irn,*jcn,nz,sym; 111 PetscScalar *val; 112 MPI_Comm mumps_comm; 113 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 114 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 115 Vec b_seq,x_seq; 116 PetscInt ninfo,*info; /* display INFO */ 117 PetscInt sizeredrhs; 118 PetscScalar *schur_sol; 119 PetscInt schur_sizesol; 120 121 PetscBool use_petsc_omp_support; 122 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 123 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 124 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 125 PetscMPIInt omp_comm_size; 126 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 127 PetscMPIInt *recvcount,*displs; 128 129 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 130 } Mat_MUMPS; 131 132 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 133 134 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 140 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 141 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 142 mumps->id.size_schur = 0; 143 mumps->id.schur_lld = 0; 144 mumps->id.ICNTL(19) = 0; 145 PetscFunctionReturn(0); 146 } 147 148 /* solve with rhs in mumps->id.redrhs and return in the same location */ 149 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 150 { 151 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 152 Mat S,B,X; 153 MatFactorSchurStatus schurstatus; 154 PetscInt sizesol; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 159 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 160 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 161 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 162 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 163 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 164 #endif 165 switch (schurstatus) { 166 case MAT_FACTOR_SCHUR_FACTORED: 167 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 168 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 170 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 171 #endif 172 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 173 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 174 } else { 175 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 176 } 177 break; 178 case MAT_FACTOR_SCHUR_INVERTED: 179 sizesol = mumps->id.nrhs*mumps->id.size_schur; 180 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 181 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 182 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 183 mumps->schur_sizesol = sizesol; 184 } 185 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 186 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 187 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 188 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 189 #endif 190 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 191 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 192 } else { 193 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 194 } 195 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 196 break; 197 default: 198 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 199 break; 200 } 201 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 202 ierr = MatDestroy(&B);CHKERRQ(ierr); 203 ierr = MatDestroy(&X);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 208 { 209 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 214 PetscFunctionReturn(0); 215 } 216 if (!expansion) { /* prepare for the condensation step */ 217 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 218 /* allocate MUMPS internal array to store reduced right-hand sides */ 219 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 220 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 221 mumps->id.lredrhs = mumps->id.size_schur; 222 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 223 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 224 } 225 mumps->id.ICNTL(26) = 1; /* condensation phase */ 226 } else { /* prepare for the expansion step */ 227 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 228 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 229 mumps->id.ICNTL(26) = 2; /* expansion phase */ 230 PetscMUMPS_c(mumps); 231 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 232 /* restore defaults */ 233 mumps->id.ICNTL(26) = -1; 234 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 235 if (mumps->id.nrhs > 1) { 236 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 237 mumps->id.lredrhs = 0; 238 mumps->sizeredrhs = 0; 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 /* 245 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 246 247 input: 248 A - matrix in aij,baij or sbaij format 249 shift - 0: C style output triple; 1: Fortran style output triple. 250 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 251 MAT_REUSE_MATRIX: only the values in v array are updated 252 output: 253 nnz - dim of r, c, and v (number of local nonzero entries of A) 254 r, c, v - row and col index, matrix values (matrix triples) 255 256 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 257 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 258 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 259 260 */ 261 262 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 263 { 264 const PetscScalar *av; 265 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 266 PetscInt nz,rnz,i,j; 267 PetscErrorCode ierr; 268 PetscInt *row,*col; 269 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 270 271 PetscFunctionBegin; 272 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 273 *v = (PetscScalar*)av; 274 if (reuse == MAT_INITIAL_MATRIX) { 275 nz = aa->nz; 276 ai = aa->i; 277 aj = aa->j; 278 *nnz = nz; 279 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 280 col = row + nz; 281 282 nz = 0; 283 for (i=0; i<M; i++) { 284 rnz = ai[i+1] - ai[i]; 285 ajj = aj + ai[i]; 286 for (j=0; j<rnz; j++) { 287 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 288 } 289 } 290 *r = row; *c = col; 291 } 292 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 297 { 298 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 299 PetscInt *ptr; 300 301 PetscFunctionBegin; 302 *v = a->val; 303 if (reuse == MAT_INITIAL_MATRIX) { 304 PetscInt nz,i,j,row; 305 PetscErrorCode ierr; 306 307 nz = a->sliidx[a->totalslices]; 308 *nnz = nz; 309 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 310 *r = ptr; 311 *c = ptr + nz; 312 313 for (i=0; i<a->totalslices; i++) { 314 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 315 *ptr++ = 8*i + row + shift; 316 } 317 } 318 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 319 } 320 PetscFunctionReturn(0); 321 } 322 323 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 324 { 325 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 326 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 327 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 328 PetscErrorCode ierr; 329 PetscInt *row,*col; 330 331 PetscFunctionBegin; 332 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 333 M = A->rmap->N/bs; 334 *v = aa->a; 335 if (reuse == MAT_INITIAL_MATRIX) { 336 ai = aa->i; aj = aa->j; 337 nz = bs2*aa->nz; 338 *nnz = nz; 339 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 340 col = row + nz; 341 342 for (i=0; i<M; i++) { 343 ajj = aj + ai[i]; 344 rnz = ai[i+1] - ai[i]; 345 for (k=0; k<rnz; k++) { 346 for (j=0; j<bs; j++) { 347 for (m=0; m<bs; m++) { 348 row[idx] = i*bs + m + shift; 349 col[idx++] = bs*(ajj[k]) + j + shift; 350 } 351 } 352 } 353 } 354 *r = row; *c = col; 355 } 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 360 { 361 const PetscInt *ai, *aj,*ajj; 362 PetscInt nz,rnz,i,j,k,m,bs; 363 PetscErrorCode ierr; 364 PetscInt *row,*col; 365 PetscScalar *val; 366 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 367 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 368 #if defined(PETSC_USE_COMPLEX) 369 PetscBool hermitian; 370 #endif 371 372 PetscFunctionBegin; 373 #if defined(PETSC_USE_COMPLEX) 374 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 375 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 376 #endif 377 ai = aa->i; 378 aj = aa->j; 379 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 383 col = row + bs2*nz; 384 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 385 else val = aa->a; 386 387 *r = row; *c = col; *v = val; 388 } else { 389 if (bs == 1) *v = aa->a; 390 row = *r; col = *c; val = *v; 391 } 392 393 nz = 0; 394 if (bs>1) { 395 for (i=0; i<mbs; i++) { 396 rnz = ai[i+1] - ai[i]; 397 ajj = aj + ai[i]; 398 for (j=0; j<rnz; j++) { 399 for (k=0; k<bs; k++) { 400 for (m=0; m<bs; m++) { 401 if (ajj[j]>i || k>=m) { 402 if (reuse == MAT_INITIAL_MATRIX) { 403 row[nz] = i*bs + m + shift; 404 col[nz] = ajj[j]*bs + k + shift; 405 } 406 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 407 } 408 } 409 } 410 } 411 } 412 } else if (reuse == MAT_INITIAL_MATRIX) { 413 for (i=0; i<mbs; i++) { 414 rnz = ai[i+1] - ai[i]; 415 ajj = aj + ai[i]; 416 for (j=0; j<rnz; j++) { 417 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 418 } 419 } 420 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 421 } 422 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 423 PetscFunctionReturn(0); 424 } 425 426 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 427 { 428 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 429 PetscInt nz,rnz,i,j; 430 const PetscScalar *av,*v1; 431 PetscScalar *val; 432 PetscErrorCode ierr; 433 PetscInt *row,*col; 434 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 435 PetscBool missing; 436 #if defined(PETSC_USE_COMPLEX) 437 PetscBool hermitian; 438 #endif 439 440 PetscFunctionBegin; 441 #if defined(PETSC_USE_COMPLEX) 442 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 443 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 444 #endif 445 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 446 ai = aa->i; aj = aa->j; 447 adiag = aa->diag; 448 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 449 if (reuse == MAT_INITIAL_MATRIX) { 450 /* count nz in the upper triangular part of A */ 451 nz = 0; 452 if (missing) { 453 for (i=0; i<M; i++) { 454 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 455 for (j=ai[i];j<ai[i+1];j++) { 456 if (aj[j] < i) continue; 457 nz++; 458 } 459 } else { 460 nz += ai[i+1] - adiag[i]; 461 } 462 } 463 } else { 464 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 465 } 466 *nnz = nz; 467 468 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 469 col = row + nz; 470 val = (PetscScalar*)(col + nz); 471 472 nz = 0; 473 if (missing) { 474 for (i=0; i<M; i++) { 475 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 476 for (j=ai[i];j<ai[i+1];j++) { 477 if (aj[j] < i) continue; 478 row[nz] = i+shift; 479 col[nz] = aj[j]+shift; 480 val[nz] = av[j]; 481 nz++; 482 } 483 } else { 484 rnz = ai[i+1] - adiag[i]; 485 ajj = aj + adiag[i]; 486 v1 = av + adiag[i]; 487 for (j=0; j<rnz; j++) { 488 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 489 } 490 } 491 } 492 } else { 493 for (i=0; i<M; i++) { 494 rnz = ai[i+1] - adiag[i]; 495 ajj = aj + adiag[i]; 496 v1 = av + adiag[i]; 497 for (j=0; j<rnz; j++) { 498 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 499 } 500 } 501 } 502 *r = row; *c = col; *v = val; 503 } else { 504 nz = 0; val = *v; 505 if (missing) { 506 for (i=0; i <M; i++) { 507 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 508 for (j=ai[i];j<ai[i+1];j++) { 509 if (aj[j] < i) continue; 510 val[nz++] = av[j]; 511 } 512 } else { 513 rnz = ai[i+1] - adiag[i]; 514 v1 = av + adiag[i]; 515 for (j=0; j<rnz; j++) { 516 val[nz++] = v1[j]; 517 } 518 } 519 } 520 } else { 521 for (i=0; i <M; i++) { 522 rnz = ai[i+1] - adiag[i]; 523 v1 = av + adiag[i]; 524 for (j=0; j<rnz; j++) { 525 val[nz++] = v1[j]; 526 } 527 } 528 } 529 } 530 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 535 { 536 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 537 PetscErrorCode ierr; 538 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 539 PetscInt *row,*col; 540 const PetscScalar *av,*bv,*v1,*v2; 541 PetscScalar *val; 542 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 543 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 544 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 545 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 546 #if defined(PETSC_USE_COMPLEX) 547 PetscBool hermitian; 548 #endif 549 550 PetscFunctionBegin; 551 #if defined(PETSC_USE_COMPLEX) 552 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 553 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 554 #endif 555 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 556 rstart = A->rmap->rstart; 557 ai = aa->i; 558 aj = aa->j; 559 bi = bb->i; 560 bj = bb->j; 561 av = aa->a; 562 bv = bb->a; 563 564 garray = mat->garray; 565 566 if (reuse == MAT_INITIAL_MATRIX) { 567 nz = aa->nz + bb->nz; 568 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 569 col = row + bs2*nz; 570 val = (PetscScalar*)(col + bs2*nz); 571 572 *r = row; *c = col; *v = val; 573 } else { 574 row = *r; col = *c; val = *v; 575 } 576 577 jj = 0; irow = rstart; 578 for (i=0; i<mbs; i++) { 579 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 580 countA = ai[i+1] - ai[i]; 581 countB = bi[i+1] - bi[i]; 582 bjj = bj + bi[i]; 583 v1 = av + ai[i]*bs2; 584 v2 = bv + bi[i]*bs2; 585 586 if (bs>1) { 587 /* A-part */ 588 for (j=0; j<countA; j++) { 589 for (k=0; k<bs; k++) { 590 for (m=0; m<bs; m++) { 591 if (rstart + ajj[j]*bs>irow || k>=m) { 592 if (reuse == MAT_INITIAL_MATRIX) { 593 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 594 } 595 val[jj++] = v1[j*bs2 + m + k*bs]; 596 } 597 } 598 } 599 } 600 601 /* B-part */ 602 for (j=0; j < countB; j++) { 603 for (k=0; k<bs; k++) { 604 for (m=0; m<bs; m++) { 605 if (reuse == MAT_INITIAL_MATRIX) { 606 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 607 } 608 val[jj++] = v2[j*bs2 + m + k*bs]; 609 } 610 } 611 } 612 } else { 613 /* A-part */ 614 for (j=0; j<countA; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 617 } 618 val[jj++] = v1[j]; 619 } 620 621 /* B-part */ 622 for (j=0; j < countB; j++) { 623 if (reuse == MAT_INITIAL_MATRIX) { 624 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 625 } 626 val[jj++] = v2[j]; 627 } 628 } 629 irow+=bs; 630 } 631 *nnz = jj; 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 636 { 637 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 638 PetscErrorCode ierr; 639 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 640 PetscInt *row,*col; 641 const PetscScalar *av, *bv,*v1,*v2; 642 PetscScalar *val; 643 Mat Ad,Ao; 644 Mat_SeqAIJ *aa; 645 Mat_SeqAIJ *bb; 646 647 PetscFunctionBegin; 648 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 649 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 650 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 651 652 aa = (Mat_SeqAIJ*)(Ad)->data; 653 bb = (Mat_SeqAIJ*)(Ao)->data; 654 ai = aa->i; 655 aj = aa->j; 656 bi = bb->i; 657 bj = bb->j; 658 659 rstart = A->rmap->rstart; 660 661 if (reuse == MAT_INITIAL_MATRIX) { 662 nz = aa->nz + bb->nz; 663 *nnz = nz; 664 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 665 col = row + nz; 666 val = (PetscScalar*)(col + nz); 667 668 *r = row; *c = col; *v = val; 669 } else { 670 row = *r; col = *c; val = *v; 671 } 672 673 jj = 0; irow = rstart; 674 for (i=0; i<m; i++) { 675 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 676 countA = ai[i+1] - ai[i]; 677 countB = bi[i+1] - bi[i]; 678 bjj = bj + bi[i]; 679 v1 = av + ai[i]; 680 v2 = bv + bi[i]; 681 682 /* A-part */ 683 for (j=0; j<countA; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 686 } 687 val[jj++] = v1[j]; 688 } 689 690 /* B-part */ 691 for (j=0; j < countB; j++) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 694 } 695 val[jj++] = v2[j]; 696 } 697 irow++; 698 } 699 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 700 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 705 { 706 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 707 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 708 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 709 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 710 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 711 const PetscInt bs2=mat->bs2; 712 PetscErrorCode ierr; 713 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 714 PetscInt *row,*col; 715 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 716 PetscScalar *val; 717 718 PetscFunctionBegin; 719 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 720 if (reuse == MAT_INITIAL_MATRIX) { 721 nz = bs2*(aa->nz + bb->nz); 722 *nnz = nz; 723 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 724 col = row + nz; 725 val = (PetscScalar*)(col + nz); 726 727 *r = row; *c = col; *v = val; 728 } else { 729 row = *r; col = *c; val = *v; 730 } 731 732 jj = 0; irow = rstart; 733 for (i=0; i<mbs; i++) { 734 countA = ai[i+1] - ai[i]; 735 countB = bi[i+1] - bi[i]; 736 ajj = aj + ai[i]; 737 bjj = bj + bi[i]; 738 v1 = av + bs2*ai[i]; 739 v2 = bv + bs2*bi[i]; 740 741 idx = 0; 742 /* A-part */ 743 for (k=0; k<countA; k++) { 744 for (j=0; j<bs; j++) { 745 for (n=0; n<bs; n++) { 746 if (reuse == MAT_INITIAL_MATRIX) { 747 row[jj] = irow + n + shift; 748 col[jj] = rstart + bs*ajj[k] + j + shift; 749 } 750 val[jj++] = v1[idx++]; 751 } 752 } 753 } 754 755 idx = 0; 756 /* B-part */ 757 for (k=0; k<countB; k++) { 758 for (j=0; j<bs; j++) { 759 for (n=0; n<bs; n++) { 760 if (reuse == MAT_INITIAL_MATRIX) { 761 row[jj] = irow + n + shift; 762 col[jj] = bs*garray[bjj[k]] + j + shift; 763 } 764 val[jj++] = v2[idx++]; 765 } 766 } 767 } 768 irow += bs; 769 } 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 774 { 775 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 776 PetscErrorCode ierr; 777 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 778 PetscInt *row,*col; 779 const PetscScalar *av, *bv,*v1,*v2; 780 PetscScalar *val; 781 Mat Ad,Ao; 782 Mat_SeqAIJ *aa; 783 Mat_SeqAIJ *bb; 784 #if defined(PETSC_USE_COMPLEX) 785 PetscBool hermitian; 786 #endif 787 788 PetscFunctionBegin; 789 #if defined(PETSC_USE_COMPLEX) 790 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 791 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 792 #endif 793 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 794 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 795 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 796 797 aa = (Mat_SeqAIJ*)(Ad)->data; 798 bb = (Mat_SeqAIJ*)(Ao)->data; 799 ai = aa->i; 800 aj = aa->j; 801 adiag = aa->diag; 802 bi = bb->i; 803 bj = bb->j; 804 805 rstart = A->rmap->rstart; 806 807 if (reuse == MAT_INITIAL_MATRIX) { 808 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 809 nzb = 0; /* num of upper triangular entries in mat->B */ 810 for (i=0; i<m; i++) { 811 nza += (ai[i+1] - adiag[i]); 812 countB = bi[i+1] - bi[i]; 813 bjj = bj + bi[i]; 814 for (j=0; j<countB; j++) { 815 if (garray[bjj[j]] > rstart) nzb++; 816 } 817 } 818 819 nz = nza + nzb; /* total nz of upper triangular part of mat */ 820 *nnz = nz; 821 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 822 col = row + nz; 823 val = (PetscScalar*)(col + nz); 824 825 *r = row; *c = col; *v = val; 826 } else { 827 row = *r; col = *c; val = *v; 828 } 829 830 jj = 0; irow = rstart; 831 for (i=0; i<m; i++) { 832 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 833 v1 = av + adiag[i]; 834 countA = ai[i+1] - adiag[i]; 835 countB = bi[i+1] - bi[i]; 836 bjj = bj + bi[i]; 837 v2 = bv + bi[i]; 838 839 /* A-part */ 840 for (j=0; j<countA; j++) { 841 if (reuse == MAT_INITIAL_MATRIX) { 842 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 843 } 844 val[jj++] = v1[j]; 845 } 846 847 /* B-part */ 848 for (j=0; j < countB; j++) { 849 if (garray[bjj[j]] > rstart) { 850 if (reuse == MAT_INITIAL_MATRIX) { 851 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 852 } 853 val[jj++] = v2[j]; 854 } 855 } 856 irow++; 857 } 858 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 859 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatDestroy_MUMPS(Mat A) 864 { 865 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 870 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 871 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 872 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 873 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 874 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 875 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 876 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 877 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 878 mumps->id.job = JOB_END; 879 PetscMUMPS_c(mumps); 880 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 881 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 882 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 883 #endif 884 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 885 ierr = PetscFree(A->data);CHKERRQ(ierr); 886 887 /* clear composed functions */ 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 892 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 893 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 894 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 898 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 899 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 905 { 906 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 907 PetscScalar *array; 908 Vec b_seq; 909 IS is_iden,is_petsc; 910 PetscErrorCode ierr; 911 PetscInt i; 912 PetscBool second_solve = PETSC_FALSE; 913 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 914 915 PetscFunctionBegin; 916 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 917 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 918 919 if (A->factorerrortype) { 920 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 921 ierr = VecSetInf(x);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 mumps->id.ICNTL(20) = 0; /* dense RHS */ 926 mumps->id.nrhs = 1; 927 b_seq = mumps->b_seq; 928 if (mumps->petsc_size > 1) { 929 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 930 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 933 } else { /* petsc_size == 1 */ 934 ierr = VecCopy(b,x);CHKERRQ(ierr); 935 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 936 } 937 if (!mumps->myid) { /* define rhs on the host */ 938 mumps->id.nrhs = 1; 939 mumps->id.rhs = (MumpsScalar*)array; 940 } 941 942 /* 943 handle condensation step of Schur complement (if any) 944 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 945 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 946 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 947 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 948 */ 949 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 950 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 951 second_solve = PETSC_TRUE; 952 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 953 } 954 /* solve phase */ 955 /*-------------*/ 956 mumps->id.job = JOB_SOLVE; 957 PetscMUMPS_c(mumps); 958 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 959 960 /* handle expansion step of Schur complement (if any) */ 961 if (second_solve) { 962 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 963 } 964 965 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 966 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 967 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 968 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 969 } 970 if (!mumps->scat_sol) { /* create scatter scat_sol */ 971 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 972 for (i=0; i<mumps->id.lsol_loc; i++) { 973 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 974 } 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 976 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 977 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 978 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 979 980 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 981 } 982 983 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 } 986 987 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 988 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 989 990 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 995 { 996 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 mumps->id.ICNTL(9) = 0; 1001 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1002 mumps->id.ICNTL(9) = 1; 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1007 { 1008 PetscErrorCode ierr; 1009 Mat Bt = NULL; 1010 PetscBool flg, flgT; 1011 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1012 PetscInt i,nrhs,M; 1013 PetscScalar *array; 1014 const PetscScalar *rbray; 1015 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1016 PetscScalar *bray,*sol_loc,*sol_loc_save; 1017 IS is_to,is_from; 1018 PetscInt k,proc,j,m,myrstart; 1019 const PetscInt *rstart; 1020 Vec v_mpi,b_seq,msol_loc; 1021 VecScatter scat_rhs,scat_sol; 1022 PetscScalar *aa; 1023 PetscInt spnr,*ia,*ja; 1024 Mat_MPIAIJ *b = NULL; 1025 1026 PetscFunctionBegin; 1027 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1028 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1029 1030 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1031 if (flg) { /* dense B */ 1032 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1033 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1034 } else { /* sparse B */ 1035 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1036 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1037 if (flgT) { /* input B is transpose of actural RHS matrix, 1038 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1039 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1040 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1041 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1042 } 1043 1044 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1045 mumps->id.nrhs = nrhs; 1046 mumps->id.lrhs = M; 1047 mumps->id.rhs = NULL; 1048 1049 if (mumps->petsc_size == 1) { 1050 PetscScalar *aa; 1051 PetscInt spnr,*ia,*ja; 1052 PetscBool second_solve = PETSC_FALSE; 1053 1054 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1055 mumps->id.rhs = (MumpsScalar*)array; 1056 1057 if (!Bt) { /* dense B */ 1058 /* copy B to X */ 1059 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1060 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1061 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1062 } else { /* sparse B */ 1063 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1064 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1065 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1066 /* mumps requires ia and ja start at 1! */ 1067 mumps->id.irhs_ptr = ia; 1068 mumps->id.irhs_sparse = ja; 1069 mumps->id.nz_rhs = ia[spnr] - 1; 1070 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1071 } 1072 /* handle condensation step of Schur complement (if any) */ 1073 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1074 second_solve = PETSC_TRUE; 1075 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1076 } 1077 /* solve phase */ 1078 /*-------------*/ 1079 mumps->id.job = JOB_SOLVE; 1080 PetscMUMPS_c(mumps); 1081 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1082 1083 /* handle expansion step of Schur complement (if any) */ 1084 if (second_solve) { 1085 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1086 } 1087 if (Bt) { /* sparse B */ 1088 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1089 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1090 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1091 } 1092 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1097 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1098 1099 /* create msol_loc to hold mumps local solution */ 1100 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1101 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1102 1103 lsol_loc = mumps->id.lsol_loc; 1104 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1105 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1106 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1107 mumps->id.isol_loc = isol_loc; 1108 1109 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1110 1111 if (!Bt) { /* dense B */ 1112 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1113 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1114 0, re-arrange B into desired order, which is a local operation. 1115 */ 1116 1117 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1118 /* wrap dense rhs matrix B into a vector v_mpi */ 1119 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1120 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1121 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1122 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1123 1124 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1125 if (!mumps->myid) { 1126 PetscInt *idx; 1127 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1128 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1129 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1130 k = 0; 1131 for (proc=0; proc<mumps->petsc_size; proc++){ 1132 for (j=0; j<nrhs; j++){ 1133 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1134 } 1135 } 1136 1137 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1139 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1140 } else { 1141 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1142 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1143 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1144 } 1145 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1146 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1148 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 1151 if (!mumps->myid) { /* define rhs on the host */ 1152 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1153 mumps->id.rhs = (MumpsScalar*)bray; 1154 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1155 } 1156 1157 } else { /* sparse B */ 1158 b = (Mat_MPIAIJ*)Bt->data; 1159 1160 /* wrap dense X into a vector v_mpi */ 1161 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1162 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1163 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1164 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1165 1166 if (!mumps->myid) { 1167 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1168 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1169 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1170 /* mumps requires ia and ja start at 1! */ 1171 mumps->id.irhs_ptr = ia; 1172 mumps->id.irhs_sparse = ja; 1173 mumps->id.nz_rhs = ia[spnr] - 1; 1174 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1175 } else { 1176 mumps->id.irhs_ptr = NULL; 1177 mumps->id.irhs_sparse = NULL; 1178 mumps->id.nz_rhs = 0; 1179 mumps->id.rhs_sparse = NULL; 1180 } 1181 } 1182 1183 /* solve phase */ 1184 /*-------------*/ 1185 mumps->id.job = JOB_SOLVE; 1186 PetscMUMPS_c(mumps); 1187 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1188 1189 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1190 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1191 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1192 1193 /* create scatter scat_sol */ 1194 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1195 /* iidx: index for scatter mumps solution to petsc X */ 1196 1197 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1198 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1199 for (i=0; i<lsol_loc; i++) { 1200 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1201 1202 for (proc=0; proc<mumps->petsc_size; proc++){ 1203 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1204 myrstart = rstart[proc]; 1205 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1206 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1207 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1208 break; 1209 } 1210 } 1211 1212 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1213 } 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1215 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1216 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1218 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1221 1222 /* free spaces */ 1223 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1224 mumps->id.isol_loc = isol_loc_save; 1225 1226 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1227 ierr = PetscFree(idxx);CHKERRQ(ierr); 1228 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1229 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1230 if (Bt) { 1231 if (!mumps->myid) { 1232 b = (Mat_MPIAIJ*)Bt->data; 1233 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1234 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1235 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1236 } 1237 } else { 1238 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1239 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1240 } 1241 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1242 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1247 { 1248 PetscErrorCode ierr; 1249 PetscBool flg; 1250 Mat B; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1254 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1255 1256 /* Create B=Bt^T that uses Bt's data structure */ 1257 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1258 1259 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1260 ierr = MatDestroy(&B);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #if !defined(PETSC_USE_COMPLEX) 1265 /* 1266 input: 1267 F: numeric factor 1268 output: 1269 nneg: total number of negative pivots 1270 nzero: total number of zero pivots 1271 npos: (global dimension of F) - nneg - nzero 1272 */ 1273 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1274 { 1275 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1276 PetscErrorCode ierr; 1277 PetscMPIInt size; 1278 1279 PetscFunctionBegin; 1280 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1281 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1282 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1283 1284 if (nneg) *nneg = mumps->id.INFOG(12); 1285 if (nzero || npos) { 1286 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1287 if (nzero) *nzero = mumps->id.INFOG(28); 1288 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1289 } 1290 PetscFunctionReturn(0); 1291 } 1292 #endif 1293 1294 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1295 { 1296 PetscErrorCode ierr; 1297 PetscInt i,nz=0,*irn,*jcn=0; 1298 PetscScalar *val=0; 1299 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1300 1301 PetscFunctionBegin; 1302 if (mumps->omp_comm_size > 1) { 1303 if (reuse == MAT_INITIAL_MATRIX) { 1304 /* master first gathers counts of nonzeros to receive */ 1305 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1306 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1307 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1308 1309 /* master allocates memory to receive nonzeros */ 1310 if (mumps->is_omp_master) { 1311 displs[0] = 0; 1312 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1313 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1314 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1315 jcn = irn + nz; 1316 val = (PetscScalar*)(jcn + nz); 1317 } 1318 1319 /* save the gatherv plan */ 1320 mumps->mpinz = mpinz; /* used as send count */ 1321 mumps->recvcount = recvcount; 1322 mumps->displs = displs; 1323 1324 /* master gathers nonzeros */ 1325 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1326 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1327 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1328 1329 /* master frees its row/col/val and replaces them with bigger arrays */ 1330 if (mumps->is_omp_master) { 1331 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1332 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1333 mumps->irn = irn; 1334 mumps->jcn = jcn; 1335 mumps->val = val; 1336 } 1337 } else { 1338 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1339 } 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1345 { 1346 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1347 PetscErrorCode ierr; 1348 PetscBool isMPIAIJ; 1349 1350 PetscFunctionBegin; 1351 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1352 if (mumps->id.INFOG(1) == -6) { 1353 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1354 } 1355 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1360 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1361 1362 /* numerical factorization phase */ 1363 /*-------------------------------*/ 1364 mumps->id.job = JOB_FACTNUMERIC; 1365 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1366 if (!mumps->myid) { 1367 mumps->id.a = (MumpsScalar*)mumps->val; 1368 } 1369 } else { 1370 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1371 } 1372 PetscMUMPS_c(mumps); 1373 if (mumps->id.INFOG(1) < 0) { 1374 if (A->erroriffailure) { 1375 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1376 } else { 1377 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1378 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1379 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1380 } else if (mumps->id.INFOG(1) == -13) { 1381 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1382 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1383 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1384 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1385 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1386 } else { 1387 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1388 F->factorerrortype = MAT_FACTOR_OTHER; 1389 } 1390 } 1391 } 1392 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1393 1394 F->assembled = PETSC_TRUE; 1395 mumps->matstruc = SAME_NONZERO_PATTERN; 1396 if (F->schur) { /* reset Schur status to unfactored */ 1397 #if defined(PETSC_HAVE_CUDA) 1398 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1399 #endif 1400 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1401 mumps->id.ICNTL(19) = 2; 1402 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1403 } 1404 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1405 } 1406 1407 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1408 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1409 1410 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1411 if (mumps->petsc_size > 1) { 1412 PetscInt lsol_loc; 1413 PetscScalar *sol_loc; 1414 1415 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1416 1417 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1418 if (mumps->x_seq) { 1419 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1420 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1421 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1422 } 1423 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1424 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1425 mumps->id.lsol_loc = lsol_loc; 1426 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1427 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1428 } 1429 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 /* Sets MUMPS options from the options database */ 1434 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1435 { 1436 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1437 PetscErrorCode ierr; 1438 PetscInt icntl,info[80],i,ninfo=80; 1439 PetscBool flg; 1440 1441 PetscFunctionBegin; 1442 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1443 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1444 if (flg) mumps->id.ICNTL(1) = icntl; 1445 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1446 if (flg) mumps->id.ICNTL(2) = icntl; 1447 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1448 if (flg) mumps->id.ICNTL(3) = icntl; 1449 1450 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1451 if (flg) mumps->id.ICNTL(4) = icntl; 1452 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1453 1454 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1455 if (flg) mumps->id.ICNTL(6) = icntl; 1456 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1458 if (flg) { 1459 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1460 else mumps->id.ICNTL(7) = icntl; 1461 } 1462 1463 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1464 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1465 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1466 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1467 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1468 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1470 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1471 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1472 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1473 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1474 } 1475 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1476 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1477 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1480 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1481 if (mumps->id.ICNTL(24)) { 1482 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1483 } 1484 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1487 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1488 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1489 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1490 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1491 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1492 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1493 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1494 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1495 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1496 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1497 1498 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1499 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1500 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1501 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1502 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1503 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1504 1505 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1506 1507 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1508 if (ninfo) { 1509 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1510 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1511 mumps->ninfo = ninfo; 1512 for (i=0; i<ninfo; i++) { 1513 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1514 else mumps->info[i] = info[i]; 1515 } 1516 } 1517 1518 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1523 { 1524 PetscErrorCode ierr; 1525 PetscInt nthreads=0; 1526 1527 PetscFunctionBegin; 1528 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1529 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1530 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1531 1532 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1533 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1534 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1535 if (mumps->use_petsc_omp_support) { 1536 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1537 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1538 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1539 #else 1540 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1541 #endif 1542 } else { 1543 mumps->omp_comm = PETSC_COMM_SELF; 1544 mumps->mumps_comm = mumps->petsc_comm; 1545 mumps->is_omp_master = PETSC_TRUE; 1546 } 1547 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1548 1549 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1550 mumps->id.job = JOB_INIT; 1551 mumps->id.par = 1; /* host participates factorizaton and solve */ 1552 mumps->id.sym = mumps->sym; 1553 1554 PetscMUMPS_c(mumps); 1555 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1556 1557 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1558 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1559 */ 1560 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1561 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1562 1563 mumps->scat_rhs = NULL; 1564 mumps->scat_sol = NULL; 1565 1566 /* set PETSc-MUMPS default options - override MUMPS default */ 1567 mumps->id.ICNTL(3) = 0; 1568 mumps->id.ICNTL(4) = 0; 1569 if (mumps->petsc_size == 1) { 1570 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1571 } else { 1572 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1573 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1574 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1575 } 1576 1577 /* schur */ 1578 mumps->id.size_schur = 0; 1579 mumps->id.listvar_schur = NULL; 1580 mumps->id.schur = NULL; 1581 mumps->sizeredrhs = 0; 1582 mumps->schur_sol = NULL; 1583 mumps->schur_sizesol = 0; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 if (mumps->id.INFOG(1) < 0) { 1593 if (A->erroriffailure) { 1594 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1595 } else { 1596 if (mumps->id.INFOG(1) == -6) { 1597 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1598 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1599 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1600 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1601 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1602 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1603 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1604 } else { 1605 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1606 F->factorerrortype = MAT_FACTOR_OTHER; 1607 } 1608 } 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1614 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1615 { 1616 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1617 PetscErrorCode ierr; 1618 Vec b; 1619 const PetscInt M = A->rmap->N; 1620 1621 PetscFunctionBegin; 1622 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1623 1624 /* Set MUMPS options from the options database */ 1625 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1626 1627 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1628 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1629 1630 /* analysis phase */ 1631 /*----------------*/ 1632 mumps->id.job = JOB_FACTSYMBOLIC; 1633 mumps->id.n = M; 1634 switch (mumps->id.ICNTL(18)) { 1635 case 0: /* centralized assembled matrix input */ 1636 if (!mumps->myid) { 1637 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1638 if (mumps->id.ICNTL(6)>1) { 1639 mumps->id.a = (MumpsScalar*)mumps->val; 1640 } 1641 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1642 /* 1643 PetscBool flag; 1644 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1645 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1646 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1647 */ 1648 if (!mumps->myid) { 1649 const PetscInt *idx; 1650 PetscInt i,*perm_in; 1651 1652 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1653 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1654 1655 mumps->id.perm_in = perm_in; 1656 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1657 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1658 } 1659 } 1660 } 1661 break; 1662 case 3: /* distributed assembled matrix input (size>1) */ 1663 mumps->id.nz_loc = mumps->nz; 1664 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1665 if (mumps->id.ICNTL(6)>1) { 1666 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1667 } 1668 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1669 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1670 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1671 ierr = VecDestroy(&b);CHKERRQ(ierr); 1672 break; 1673 } 1674 PetscMUMPS_c(mumps); 1675 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1676 1677 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1678 F->ops->solve = MatSolve_MUMPS; 1679 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1680 F->ops->matsolve = MatMatSolve_MUMPS; 1681 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 /* Note the Petsc r and c permutations are ignored */ 1686 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1687 { 1688 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1689 PetscErrorCode ierr; 1690 Vec b; 1691 const PetscInt M = A->rmap->N; 1692 1693 PetscFunctionBegin; 1694 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1695 1696 /* Set MUMPS options from the options database */ 1697 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1698 1699 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1700 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1701 1702 /* analysis phase */ 1703 /*----------------*/ 1704 mumps->id.job = JOB_FACTSYMBOLIC; 1705 mumps->id.n = M; 1706 switch (mumps->id.ICNTL(18)) { 1707 case 0: /* centralized assembled matrix input */ 1708 if (!mumps->myid) { 1709 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1710 if (mumps->id.ICNTL(6)>1) { 1711 mumps->id.a = (MumpsScalar*)mumps->val; 1712 } 1713 } 1714 break; 1715 case 3: /* distributed assembled matrix input (size>1) */ 1716 mumps->id.nz_loc = mumps->nz; 1717 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1718 if (mumps->id.ICNTL(6)>1) { 1719 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1720 } 1721 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1722 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1723 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1724 ierr = VecDestroy(&b);CHKERRQ(ierr); 1725 break; 1726 } 1727 PetscMUMPS_c(mumps); 1728 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1729 1730 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1731 F->ops->solve = MatSolve_MUMPS; 1732 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 /* Note the Petsc r permutation and factor info are ignored */ 1737 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1738 { 1739 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1740 PetscErrorCode ierr; 1741 Vec b; 1742 const PetscInt M = A->rmap->N; 1743 1744 PetscFunctionBegin; 1745 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1746 1747 /* Set MUMPS options from the options database */ 1748 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1749 1750 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1751 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1752 1753 /* analysis phase */ 1754 /*----------------*/ 1755 mumps->id.job = JOB_FACTSYMBOLIC; 1756 mumps->id.n = M; 1757 switch (mumps->id.ICNTL(18)) { 1758 case 0: /* centralized assembled matrix input */ 1759 if (!mumps->myid) { 1760 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1761 if (mumps->id.ICNTL(6)>1) { 1762 mumps->id.a = (MumpsScalar*)mumps->val; 1763 } 1764 } 1765 break; 1766 case 3: /* distributed assembled matrix input (size>1) */ 1767 mumps->id.nz_loc = mumps->nz; 1768 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1769 if (mumps->id.ICNTL(6)>1) { 1770 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1771 } 1772 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1773 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1774 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1775 ierr = VecDestroy(&b);CHKERRQ(ierr); 1776 break; 1777 } 1778 PetscMUMPS_c(mumps); 1779 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1780 1781 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1782 F->ops->solve = MatSolve_MUMPS; 1783 F->ops->solvetranspose = MatSolve_MUMPS; 1784 F->ops->matsolve = MatMatSolve_MUMPS; 1785 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1786 #if defined(PETSC_USE_COMPLEX) 1787 F->ops->getinertia = NULL; 1788 #else 1789 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1790 #endif 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1795 { 1796 PetscErrorCode ierr; 1797 PetscBool iascii; 1798 PetscViewerFormat format; 1799 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1800 1801 PetscFunctionBegin; 1802 /* check if matrix is mumps type */ 1803 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1804 1805 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1806 if (iascii) { 1807 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1808 if (format == PETSC_VIEWER_ASCII_INFO) { 1809 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1820 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1822 if (mumps->id.ICNTL(11)>0) { 1823 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1829 } 1830 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1833 /* ICNTL(15-17) not used */ 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1838 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1840 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1845 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1847 1848 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1852 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1854 1855 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1859 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1860 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1861 1862 /* infomation local to each processor */ 1863 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1864 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1865 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1866 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1868 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1869 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1870 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1871 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1872 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1873 1874 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1875 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1876 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1877 1878 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1879 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1880 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1881 1882 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1883 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1884 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1885 1886 if (mumps->ninfo && mumps->ninfo <= 80){ 1887 PetscInt i; 1888 for (i=0; i<mumps->ninfo; i++){ 1889 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1890 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1891 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1892 } 1893 } 1894 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1895 1896 if (!mumps->myid) { /* information from the host */ 1897 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1901 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1925 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1926 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1927 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1928 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1929 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1930 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1931 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1932 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1933 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1934 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1935 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1936 } 1937 } 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1943 { 1944 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1945 1946 PetscFunctionBegin; 1947 info->block_size = 1.0; 1948 info->nz_allocated = mumps->id.INFOG(20); 1949 info->nz_used = mumps->id.INFOG(20); 1950 info->nz_unneeded = 0.0; 1951 info->assemblies = 0.0; 1952 info->mallocs = 0.0; 1953 info->memory = 0.0; 1954 info->fill_ratio_given = 0; 1955 info->fill_ratio_needed = 0; 1956 info->factor_mallocs = 0; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 /* -------------------------------------------------------------------------------------------*/ 1961 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1962 { 1963 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1964 const PetscScalar *arr; 1965 const PetscInt *idxs; 1966 PetscInt size,i; 1967 PetscErrorCode ierr; 1968 1969 PetscFunctionBegin; 1970 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1971 if (mumps->petsc_size > 1) { 1972 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1973 1974 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1975 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1976 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1977 } 1978 1979 /* Schur complement matrix */ 1980 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1981 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1982 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1983 mumps->id.schur = (MumpsScalar*)arr; 1984 mumps->id.size_schur = size; 1985 mumps->id.schur_lld = size; 1986 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1987 if (mumps->sym == 1) { 1988 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1989 } 1990 1991 /* MUMPS expects Fortran style indices */ 1992 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1993 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1994 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1995 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1996 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1997 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1998 if (mumps->petsc_size > 1) { 1999 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2000 } else { 2001 if (F->factortype == MAT_FACTOR_LU) { 2002 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2003 } else { 2004 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2005 } 2006 } 2007 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2008 mumps->id.ICNTL(26) = -1; 2009 PetscFunctionReturn(0); 2010 } 2011 2012 /* -------------------------------------------------------------------------------------------*/ 2013 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2014 { 2015 Mat St; 2016 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2017 PetscScalar *array; 2018 #if defined(PETSC_USE_COMPLEX) 2019 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2020 #endif 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2025 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2026 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2027 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2028 ierr = MatSetUp(St);CHKERRQ(ierr); 2029 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2030 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2031 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2032 PetscInt i,j,N=mumps->id.size_schur; 2033 for (i=0;i<N;i++) { 2034 for (j=0;j<N;j++) { 2035 #if !defined(PETSC_USE_COMPLEX) 2036 PetscScalar val = mumps->id.schur[i*N+j]; 2037 #else 2038 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2039 #endif 2040 array[j*N+i] = val; 2041 } 2042 } 2043 } else { /* stored by columns */ 2044 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2045 } 2046 } else { /* either full or lower-triangular (not packed) */ 2047 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2048 PetscInt i,j,N=mumps->id.size_schur; 2049 for (i=0;i<N;i++) { 2050 for (j=i;j<N;j++) { 2051 #if !defined(PETSC_USE_COMPLEX) 2052 PetscScalar val = mumps->id.schur[i*N+j]; 2053 #else 2054 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2055 #endif 2056 array[i*N+j] = val; 2057 array[j*N+i] = val; 2058 } 2059 } 2060 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2061 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2062 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2063 PetscInt i,j,N=mumps->id.size_schur; 2064 for (i=0;i<N;i++) { 2065 for (j=0;j<i+1;j++) { 2066 #if !defined(PETSC_USE_COMPLEX) 2067 PetscScalar val = mumps->id.schur[i*N+j]; 2068 #else 2069 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2070 #endif 2071 array[i*N+j] = val; 2072 array[j*N+i] = val; 2073 } 2074 } 2075 } 2076 } 2077 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2078 *S = St; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* -------------------------------------------------------------------------------------------*/ 2083 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2084 { 2085 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2086 2087 PetscFunctionBegin; 2088 mumps->id.ICNTL(icntl) = ival; 2089 PetscFunctionReturn(0); 2090 } 2091 2092 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2093 { 2094 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2095 2096 PetscFunctionBegin; 2097 *ival = mumps->id.ICNTL(icntl); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 /*@ 2102 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2103 2104 Logically Collective on Mat 2105 2106 Input Parameters: 2107 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2108 . icntl - index of MUMPS parameter array ICNTL() 2109 - ival - value of MUMPS ICNTL(icntl) 2110 2111 Options Database: 2112 . -mat_mumps_icntl_<icntl> <ival> 2113 2114 Level: beginner 2115 2116 References: 2117 . MUMPS Users' Guide 2118 2119 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2120 @*/ 2121 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2122 { 2123 PetscErrorCode ierr; 2124 2125 PetscFunctionBegin; 2126 PetscValidType(F,1); 2127 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2128 PetscValidLogicalCollectiveInt(F,icntl,2); 2129 PetscValidLogicalCollectiveInt(F,ival,3); 2130 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /*@ 2135 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2136 2137 Logically Collective on Mat 2138 2139 Input Parameters: 2140 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2141 - icntl - index of MUMPS parameter array ICNTL() 2142 2143 Output Parameter: 2144 . ival - value of MUMPS ICNTL(icntl) 2145 2146 Level: beginner 2147 2148 References: 2149 . MUMPS Users' Guide 2150 2151 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2152 @*/ 2153 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2154 { 2155 PetscErrorCode ierr; 2156 2157 PetscFunctionBegin; 2158 PetscValidType(F,1); 2159 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2160 PetscValidLogicalCollectiveInt(F,icntl,2); 2161 PetscValidIntPointer(ival,3); 2162 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* -------------------------------------------------------------------------------------------*/ 2167 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2168 { 2169 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2170 2171 PetscFunctionBegin; 2172 mumps->id.CNTL(icntl) = val; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2177 { 2178 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2179 2180 PetscFunctionBegin; 2181 *val = mumps->id.CNTL(icntl); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /*@ 2186 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2187 2188 Logically Collective on Mat 2189 2190 Input Parameters: 2191 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2192 . icntl - index of MUMPS parameter array CNTL() 2193 - val - value of MUMPS CNTL(icntl) 2194 2195 Options Database: 2196 . -mat_mumps_cntl_<icntl> <val> 2197 2198 Level: beginner 2199 2200 References: 2201 . MUMPS Users' Guide 2202 2203 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2204 @*/ 2205 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2206 { 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 PetscValidType(F,1); 2211 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2212 PetscValidLogicalCollectiveInt(F,icntl,2); 2213 PetscValidLogicalCollectiveReal(F,val,3); 2214 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /*@ 2219 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2220 2221 Logically Collective on Mat 2222 2223 Input Parameters: 2224 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2225 - icntl - index of MUMPS parameter array CNTL() 2226 2227 Output Parameter: 2228 . val - value of MUMPS CNTL(icntl) 2229 2230 Level: beginner 2231 2232 References: 2233 . MUMPS Users' Guide 2234 2235 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2236 @*/ 2237 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2238 { 2239 PetscErrorCode ierr; 2240 2241 PetscFunctionBegin; 2242 PetscValidType(F,1); 2243 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2244 PetscValidLogicalCollectiveInt(F,icntl,2); 2245 PetscValidRealPointer(val,3); 2246 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2251 { 2252 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2253 2254 PetscFunctionBegin; 2255 *info = mumps->id.INFO(icntl); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2260 { 2261 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2262 2263 PetscFunctionBegin; 2264 *infog = mumps->id.INFOG(icntl); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2269 { 2270 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2271 2272 PetscFunctionBegin; 2273 *rinfo = mumps->id.RINFO(icntl); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2278 { 2279 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2280 2281 PetscFunctionBegin; 2282 *rinfog = mumps->id.RINFOG(icntl); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2287 { 2288 PetscErrorCode ierr; 2289 Mat Bt = NULL,Btseq = NULL; 2290 PetscBool flg; 2291 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2292 PetscScalar *aa; 2293 PetscInt spnr,*ia,*ja,M,nrhs; 2294 2295 PetscFunctionBegin; 2296 PetscValidIntPointer(spRHS,2); 2297 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2298 if (flg) { 2299 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2300 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2301 2302 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2303 2304 if (mumps->petsc_size > 1) { 2305 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2306 Btseq = b->A; 2307 } else { 2308 Btseq = Bt; 2309 } 2310 2311 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2312 mumps->id.nrhs = nrhs; 2313 mumps->id.lrhs = M; 2314 mumps->id.rhs = NULL; 2315 2316 if (!mumps->myid) { 2317 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2318 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2319 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2320 2321 mumps->id.irhs_ptr = ia; 2322 mumps->id.irhs_sparse = ja; 2323 mumps->id.nz_rhs = ia[spnr] - 1; 2324 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2325 } else { 2326 mumps->id.irhs_ptr = NULL; 2327 mumps->id.irhs_sparse = NULL; 2328 mumps->id.nz_rhs = 0; 2329 mumps->id.rhs_sparse = NULL; 2330 } 2331 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2332 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2333 2334 /* solve phase */ 2335 /*-------------*/ 2336 mumps->id.job = JOB_SOLVE; 2337 PetscMUMPS_c(mumps); 2338 if (mumps->id.INFOG(1) < 0) 2339 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2340 2341 if (!mumps->myid) { 2342 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2343 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2344 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 /*@ 2350 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2351 2352 Logically Collective on Mat 2353 2354 Input Parameters: 2355 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2356 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2357 2358 Output Parameter: 2359 . spRHS - requested entries of inverse of A 2360 2361 Level: beginner 2362 2363 References: 2364 . MUMPS Users' Guide 2365 2366 .seealso: MatGetFactor(), MatCreateTranspose() 2367 @*/ 2368 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2369 { 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 PetscValidType(F,1); 2374 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2375 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2380 { 2381 PetscErrorCode ierr; 2382 Mat spRHS; 2383 2384 PetscFunctionBegin; 2385 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2386 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2387 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 /*@ 2392 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2393 2394 Logically Collective on Mat 2395 2396 Input Parameters: 2397 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2398 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2399 2400 Output Parameter: 2401 . spRHST - requested entries of inverse of A^T 2402 2403 Level: beginner 2404 2405 References: 2406 . MUMPS Users' Guide 2407 2408 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2409 @*/ 2410 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2411 { 2412 PetscErrorCode ierr; 2413 PetscBool flg; 2414 2415 PetscFunctionBegin; 2416 PetscValidType(F,1); 2417 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2418 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2419 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2420 2421 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 /*@ 2426 MatMumpsGetInfo - Get MUMPS parameter INFO() 2427 2428 Logically Collective on Mat 2429 2430 Input Parameters: 2431 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2432 - icntl - index of MUMPS parameter array INFO() 2433 2434 Output Parameter: 2435 . ival - value of MUMPS INFO(icntl) 2436 2437 Level: beginner 2438 2439 References: 2440 . MUMPS Users' Guide 2441 2442 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2443 @*/ 2444 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2445 { 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 PetscValidType(F,1); 2450 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2451 PetscValidIntPointer(ival,3); 2452 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 /*@ 2457 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2458 2459 Logically Collective on Mat 2460 2461 Input Parameters: 2462 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2463 - icntl - index of MUMPS parameter array INFOG() 2464 2465 Output Parameter: 2466 . ival - value of MUMPS INFOG(icntl) 2467 2468 Level: beginner 2469 2470 References: 2471 . MUMPS Users' Guide 2472 2473 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2474 @*/ 2475 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2476 { 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 PetscValidType(F,1); 2481 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2482 PetscValidIntPointer(ival,3); 2483 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /*@ 2488 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2489 2490 Logically Collective on Mat 2491 2492 Input Parameters: 2493 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2494 - icntl - index of MUMPS parameter array RINFO() 2495 2496 Output Parameter: 2497 . val - value of MUMPS RINFO(icntl) 2498 2499 Level: beginner 2500 2501 References: 2502 . MUMPS Users' Guide 2503 2504 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2505 @*/ 2506 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 PetscValidType(F,1); 2512 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2513 PetscValidRealPointer(val,3); 2514 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 /*@ 2519 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2520 2521 Logically Collective on Mat 2522 2523 Input Parameters: 2524 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2525 - icntl - index of MUMPS parameter array RINFOG() 2526 2527 Output Parameter: 2528 . val - value of MUMPS RINFOG(icntl) 2529 2530 Level: beginner 2531 2532 References: 2533 . MUMPS Users' Guide 2534 2535 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2536 @*/ 2537 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 PetscValidType(F,1); 2543 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2544 PetscValidRealPointer(val,3); 2545 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /*MC 2550 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2551 distributed and sequential matrices via the external package MUMPS. 2552 2553 Works with MATAIJ and MATSBAIJ matrices 2554 2555 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2556 2557 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2558 2559 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2560 2561 Options Database Keys: 2562 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2563 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2564 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2565 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2566 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2567 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2568 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2569 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2570 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2571 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2572 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2573 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2574 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2575 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2576 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2577 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2578 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2579 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2580 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2581 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2582 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2583 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2584 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2585 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2586 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2587 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2588 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2589 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2590 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2591 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2592 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2593 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2594 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2595 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2596 2597 Level: beginner 2598 2599 Notes: 2600 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2601 2602 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2603 $ KSPGetPC(ksp,&pc); 2604 $ PCFactorGetMatrix(pc,&mat); 2605 $ MatMumpsGetInfo(mat,....); 2606 $ MatMumpsGetInfog(mat,....); etc. 2607 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2608 2609 Two modes to run MUMPS/PETSc with OpenMP 2610 2611 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2612 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2613 2614 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2615 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2616 2617 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2618 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2619 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2620 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2621 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2622 2623 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2624 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2625 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2626 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2627 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2628 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2629 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2630 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2631 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2632 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2633 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2634 examine the mapping result. 2635 2636 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2637 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2638 calls omp_set_num_threads(m) internally before calling MUMPS. 2639 2640 References: 2641 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2642 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2643 2644 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2645 2646 M*/ 2647 2648 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2649 { 2650 PetscFunctionBegin; 2651 *type = MATSOLVERMUMPS; 2652 PetscFunctionReturn(0); 2653 } 2654 2655 /* MatGetFactor for Seq and MPI AIJ matrices */ 2656 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2657 { 2658 Mat B; 2659 PetscErrorCode ierr; 2660 Mat_MUMPS *mumps; 2661 PetscBool isSeqAIJ; 2662 2663 PetscFunctionBegin; 2664 #if defined(PETSC_USE_COMPLEX) 2665 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2666 #endif 2667 /* Create the factorization matrix */ 2668 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2669 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2670 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2671 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2672 ierr = MatSetUp(B);CHKERRQ(ierr); 2673 2674 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2675 2676 B->ops->view = MatView_MUMPS; 2677 B->ops->getinfo = MatGetInfo_MUMPS; 2678 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2692 2693 if (ftype == MAT_FACTOR_LU) { 2694 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2695 B->factortype = MAT_FACTOR_LU; 2696 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2697 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2698 mumps->sym = 0; 2699 } else { 2700 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2701 B->factortype = MAT_FACTOR_CHOLESKY; 2702 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2703 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2704 #if defined(PETSC_USE_COMPLEX) 2705 mumps->sym = 2; 2706 #else 2707 if (A->spd_set && A->spd) mumps->sym = 1; 2708 else mumps->sym = 2; 2709 #endif 2710 } 2711 2712 /* set solvertype */ 2713 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2714 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2715 2716 B->ops->destroy = MatDestroy_MUMPS; 2717 B->data = (void*)mumps; 2718 2719 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2720 2721 *F = B; 2722 PetscFunctionReturn(0); 2723 } 2724 2725 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2726 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2727 { 2728 Mat B; 2729 PetscErrorCode ierr; 2730 Mat_MUMPS *mumps; 2731 PetscBool isSeqSBAIJ; 2732 2733 PetscFunctionBegin; 2734 #if defined(PETSC_USE_COMPLEX) 2735 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2736 #endif 2737 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2738 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2739 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2740 ierr = MatSetUp(B);CHKERRQ(ierr); 2741 2742 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2743 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2744 if (isSeqSBAIJ) { 2745 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2746 } else { 2747 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2748 } 2749 2750 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2751 B->ops->view = MatView_MUMPS; 2752 B->ops->getinfo = MatGetInfo_MUMPS; 2753 2754 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2755 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2756 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2757 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2758 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2759 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2761 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2762 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2763 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2764 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2765 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2766 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2767 2768 B->factortype = MAT_FACTOR_CHOLESKY; 2769 #if defined(PETSC_USE_COMPLEX) 2770 mumps->sym = 2; 2771 #else 2772 if (A->spd_set && A->spd) mumps->sym = 1; 2773 else mumps->sym = 2; 2774 #endif 2775 2776 /* set solvertype */ 2777 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2778 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2779 2780 B->ops->destroy = MatDestroy_MUMPS; 2781 B->data = (void*)mumps; 2782 2783 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2784 2785 *F = B; 2786 PetscFunctionReturn(0); 2787 } 2788 2789 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2790 { 2791 Mat B; 2792 PetscErrorCode ierr; 2793 Mat_MUMPS *mumps; 2794 PetscBool isSeqBAIJ; 2795 2796 PetscFunctionBegin; 2797 /* Create the factorization matrix */ 2798 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2799 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2800 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2801 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2802 ierr = MatSetUp(B);CHKERRQ(ierr); 2803 2804 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2805 if (ftype == MAT_FACTOR_LU) { 2806 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2807 B->factortype = MAT_FACTOR_LU; 2808 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2809 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2810 mumps->sym = 0; 2811 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2812 2813 B->ops->view = MatView_MUMPS; 2814 B->ops->getinfo = MatGetInfo_MUMPS; 2815 2816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2826 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2827 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2828 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2829 2830 /* set solvertype */ 2831 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2832 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2833 2834 B->ops->destroy = MatDestroy_MUMPS; 2835 B->data = (void*)mumps; 2836 2837 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2838 2839 *F = B; 2840 PetscFunctionReturn(0); 2841 } 2842 2843 /* MatGetFactor for Seq and MPI SELL matrices */ 2844 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2845 { 2846 Mat B; 2847 PetscErrorCode ierr; 2848 Mat_MUMPS *mumps; 2849 PetscBool isSeqSELL; 2850 2851 PetscFunctionBegin; 2852 /* Create the factorization matrix */ 2853 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2854 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2855 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2856 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2857 ierr = MatSetUp(B);CHKERRQ(ierr); 2858 2859 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2860 2861 B->ops->view = MatView_MUMPS; 2862 B->ops->getinfo = MatGetInfo_MUMPS; 2863 2864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2875 2876 if (ftype == MAT_FACTOR_LU) { 2877 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2878 B->factortype = MAT_FACTOR_LU; 2879 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2880 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2881 mumps->sym = 0; 2882 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2883 2884 /* set solvertype */ 2885 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2886 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2887 2888 B->ops->destroy = MatDestroy_MUMPS; 2889 B->data = (void*)mumps; 2890 2891 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2892 2893 *F = B; 2894 PetscFunctionReturn(0); 2895 } 2896 2897 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2898 { 2899 PetscErrorCode ierr; 2900 2901 PetscFunctionBegin; 2902 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2903 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2904 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2905 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2906 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2907 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2908 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2909 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2910 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2911 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2912 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916