1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 58 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 59 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 60 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 61 */ \ 62 ierr = MPI_Bcast(mumps->id.infog, 40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 63 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); \ 64 ierr = MPI_Bcast(mumps->id.info, 1, MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 65 } else { \ 66 MUMPS_c(&mumps->id); \ 67 } \ 68 } while(0) 69 #else 70 #define PetscMUMPS_c(mumps) \ 71 do { MUMPS_c(&mumps->id); } while (0) 72 #endif 73 74 /* declare MumpsScalar */ 75 #if defined(PETSC_USE_COMPLEX) 76 #if defined(PETSC_USE_REAL_SINGLE) 77 #define MumpsScalar mumps_complex 78 #else 79 #define MumpsScalar mumps_double_complex 80 #endif 81 #else 82 #define MumpsScalar PetscScalar 83 #endif 84 85 /* macros s.t. indices match MUMPS documentation */ 86 #define ICNTL(I) icntl[(I)-1] 87 #define CNTL(I) cntl[(I)-1] 88 #define INFOG(I) infog[(I)-1] 89 #define INFO(I) info[(I)-1] 90 #define RINFOG(I) rinfog[(I)-1] 91 #define RINFO(I) rinfo[(I)-1] 92 93 typedef struct { 94 #if defined(PETSC_USE_COMPLEX) 95 #if defined(PETSC_USE_REAL_SINGLE) 96 CMUMPS_STRUC_C id; 97 #else 98 ZMUMPS_STRUC_C id; 99 #endif 100 #else 101 #if defined(PETSC_USE_REAL_SINGLE) 102 SMUMPS_STRUC_C id; 103 #else 104 DMUMPS_STRUC_C id; 105 #endif 106 #endif 107 108 MatStructure matstruc; 109 PetscMPIInt myid,petsc_size; 110 PetscInt *irn,*jcn,nz,sym; 111 PetscScalar *val; 112 MPI_Comm mumps_comm; 113 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 114 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 115 Vec b_seq,x_seq; 116 PetscInt ninfo,*info; /* display INFO */ 117 PetscInt sizeredrhs; 118 PetscScalar *schur_sol; 119 PetscInt schur_sizesol; 120 121 PetscBool use_petsc_omp_support; 122 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 123 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 124 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 125 PetscMPIInt omp_comm_size; 126 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 127 PetscMPIInt *recvcount,*displs; 128 129 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 130 } Mat_MUMPS; 131 132 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 133 134 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 140 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 141 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 142 mumps->id.size_schur = 0; 143 mumps->id.schur_lld = 0; 144 mumps->id.ICNTL(19) = 0; 145 PetscFunctionReturn(0); 146 } 147 148 /* solve with rhs in mumps->id.redrhs and return in the same location */ 149 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 150 { 151 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 152 Mat S,B,X; 153 MatFactorSchurStatus schurstatus; 154 PetscInt sizesol; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 159 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 160 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 161 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 162 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 163 ierr = MatPinToCPU(B,S->pinnedtocpu);CHKERRQ(ierr); 164 #endif 165 switch (schurstatus) { 166 case MAT_FACTOR_SCHUR_FACTORED: 167 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 168 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 170 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 171 #endif 172 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 173 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 174 } else { 175 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 176 } 177 break; 178 case MAT_FACTOR_SCHUR_INVERTED: 179 sizesol = mumps->id.nrhs*mumps->id.size_schur; 180 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 181 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 182 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 183 mumps->schur_sizesol = sizesol; 184 } 185 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 186 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 187 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 188 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 189 #endif 190 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 191 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 192 } else { 193 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 194 } 195 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 196 break; 197 default: 198 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 199 break; 200 } 201 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 202 ierr = MatDestroy(&B);CHKERRQ(ierr); 203 ierr = MatDestroy(&X);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 208 { 209 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 214 PetscFunctionReturn(0); 215 } 216 if (!expansion) { /* prepare for the condensation step */ 217 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 218 /* allocate MUMPS internal array to store reduced right-hand sides */ 219 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 220 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 221 mumps->id.lredrhs = mumps->id.size_schur; 222 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 223 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 224 } 225 mumps->id.ICNTL(26) = 1; /* condensation phase */ 226 } else { /* prepare for the expansion step */ 227 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 228 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 229 mumps->id.ICNTL(26) = 2; /* expansion phase */ 230 PetscMUMPS_c(mumps); 231 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 232 /* restore defaults */ 233 mumps->id.ICNTL(26) = -1; 234 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 235 if (mumps->id.nrhs > 1) { 236 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 237 mumps->id.lredrhs = 0; 238 mumps->sizeredrhs = 0; 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 /* 245 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 246 247 input: 248 A - matrix in aij,baij or sbaij format 249 shift - 0: C style output triple; 1: Fortran style output triple. 250 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 251 MAT_REUSE_MATRIX: only the values in v array are updated 252 output: 253 nnz - dim of r, c, and v (number of local nonzero entries of A) 254 r, c, v - row and col index, matrix values (matrix triples) 255 256 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 257 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 258 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 259 260 */ 261 262 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 263 { 264 const PetscScalar *av; 265 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 266 PetscInt nz,rnz,i,j; 267 PetscErrorCode ierr; 268 PetscInt *row,*col; 269 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 270 271 PetscFunctionBegin; 272 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 273 *v = (PetscScalar*)av; 274 if (reuse == MAT_INITIAL_MATRIX) { 275 nz = aa->nz; 276 ai = aa->i; 277 aj = aa->j; 278 *nnz = nz; 279 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 280 col = row + nz; 281 282 nz = 0; 283 for (i=0; i<M; i++) { 284 rnz = ai[i+1] - ai[i]; 285 ajj = aj + ai[i]; 286 for (j=0; j<rnz; j++) { 287 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 288 } 289 } 290 *r = row; *c = col; 291 } 292 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 297 { 298 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 299 PetscInt *ptr; 300 301 PetscFunctionBegin; 302 *v = a->val; 303 if (reuse == MAT_INITIAL_MATRIX) { 304 PetscInt nz,i,j,row; 305 PetscErrorCode ierr; 306 307 nz = a->sliidx[a->totalslices]; 308 *nnz = nz; 309 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 310 *r = ptr; 311 *c = ptr + nz; 312 313 for (i=0; i<a->totalslices; i++) { 314 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 315 *ptr++ = 8*i + row + shift; 316 } 317 } 318 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 319 } 320 PetscFunctionReturn(0); 321 } 322 323 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 324 { 325 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 326 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 327 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 328 PetscErrorCode ierr; 329 PetscInt *row,*col; 330 331 PetscFunctionBegin; 332 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 333 M = A->rmap->N/bs; 334 *v = aa->a; 335 if (reuse == MAT_INITIAL_MATRIX) { 336 ai = aa->i; aj = aa->j; 337 nz = bs2*aa->nz; 338 *nnz = nz; 339 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 340 col = row + nz; 341 342 for (i=0; i<M; i++) { 343 ajj = aj + ai[i]; 344 rnz = ai[i+1] - ai[i]; 345 for (k=0; k<rnz; k++) { 346 for (j=0; j<bs; j++) { 347 for (m=0; m<bs; m++) { 348 row[idx] = i*bs + m + shift; 349 col[idx++] = bs*(ajj[k]) + j + shift; 350 } 351 } 352 } 353 } 354 *r = row; *c = col; 355 } 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 360 { 361 const PetscInt *ai, *aj,*ajj; 362 PetscInt nz,rnz,i,j,k,m,bs; 363 PetscErrorCode ierr; 364 PetscInt *row,*col; 365 PetscScalar *val; 366 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 367 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 368 #if defined(PETSC_USE_COMPLEX) 369 PetscBool hermitian; 370 #endif 371 372 PetscFunctionBegin; 373 #if defined(PETSC_USE_COMPLEX) 374 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 375 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 376 #endif 377 ai = aa->i; 378 aj = aa->j; 379 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 383 col = row + bs2*nz; 384 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 385 else val = aa->a; 386 387 *r = row; *c = col; *v = val; 388 } else { 389 if (bs == 1) *v = aa->a; 390 row = *r; col = *c; val = *v; 391 } 392 393 nz = 0; 394 if (bs>1) { 395 for (i=0; i<mbs; i++) { 396 rnz = ai[i+1] - ai[i]; 397 ajj = aj + ai[i]; 398 for (j=0; j<rnz; j++) { 399 for (k=0; k<bs; k++) { 400 for (m=0; m<bs; m++) { 401 if (ajj[j]>i || k>=m) { 402 if (reuse == MAT_INITIAL_MATRIX) { 403 row[nz] = i*bs + m + shift; 404 col[nz] = ajj[j]*bs + k + shift; 405 } 406 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 407 } 408 } 409 } 410 } 411 } 412 } else if (reuse == MAT_INITIAL_MATRIX) { 413 for (i=0; i<mbs; i++) { 414 rnz = ai[i+1] - ai[i]; 415 ajj = aj + ai[i]; 416 for (j=0; j<rnz; j++) { 417 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 418 } 419 } 420 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 421 } 422 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 423 PetscFunctionReturn(0); 424 } 425 426 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 427 { 428 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 429 PetscInt nz,rnz,i,j; 430 const PetscScalar *av,*v1; 431 PetscScalar *val; 432 PetscErrorCode ierr; 433 PetscInt *row,*col; 434 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 435 PetscBool missing; 436 #if defined(PETSC_USE_COMPLEX) 437 PetscBool hermitian; 438 #endif 439 440 PetscFunctionBegin; 441 #if defined(PETSC_USE_COMPLEX) 442 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 443 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 444 #endif 445 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 446 ai = aa->i; aj = aa->j; 447 adiag = aa->diag; 448 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 449 if (reuse == MAT_INITIAL_MATRIX) { 450 /* count nz in the upper triangular part of A */ 451 nz = 0; 452 if (missing) { 453 for (i=0; i<M; i++) { 454 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 455 for (j=ai[i];j<ai[i+1];j++) { 456 if (aj[j] < i) continue; 457 nz++; 458 } 459 } else { 460 nz += ai[i+1] - adiag[i]; 461 } 462 } 463 } else { 464 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 465 } 466 *nnz = nz; 467 468 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 469 col = row + nz; 470 val = (PetscScalar*)(col + nz); 471 472 nz = 0; 473 if (missing) { 474 for (i=0; i<M; i++) { 475 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 476 for (j=ai[i];j<ai[i+1];j++) { 477 if (aj[j] < i) continue; 478 row[nz] = i+shift; 479 col[nz] = aj[j]+shift; 480 val[nz] = av[j]; 481 nz++; 482 } 483 } else { 484 rnz = ai[i+1] - adiag[i]; 485 ajj = aj + adiag[i]; 486 v1 = av + adiag[i]; 487 for (j=0; j<rnz; j++) { 488 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 489 } 490 } 491 } 492 } else { 493 for (i=0; i<M; i++) { 494 rnz = ai[i+1] - adiag[i]; 495 ajj = aj + adiag[i]; 496 v1 = av + adiag[i]; 497 for (j=0; j<rnz; j++) { 498 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 499 } 500 } 501 } 502 *r = row; *c = col; *v = val; 503 } else { 504 nz = 0; val = *v; 505 if (missing) { 506 for (i=0; i <M; i++) { 507 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 508 for (j=ai[i];j<ai[i+1];j++) { 509 if (aj[j] < i) continue; 510 val[nz++] = av[j]; 511 } 512 } else { 513 rnz = ai[i+1] - adiag[i]; 514 v1 = av + adiag[i]; 515 for (j=0; j<rnz; j++) { 516 val[nz++] = v1[j]; 517 } 518 } 519 } 520 } else { 521 for (i=0; i <M; i++) { 522 rnz = ai[i+1] - adiag[i]; 523 v1 = av + adiag[i]; 524 for (j=0; j<rnz; j++) { 525 val[nz++] = v1[j]; 526 } 527 } 528 } 529 } 530 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 535 { 536 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 537 PetscErrorCode ierr; 538 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 539 PetscInt *row,*col; 540 const PetscScalar *av,*bv,*v1,*v2; 541 PetscScalar *val; 542 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 543 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 544 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 545 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 546 #if defined(PETSC_USE_COMPLEX) 547 PetscBool hermitian; 548 #endif 549 550 PetscFunctionBegin; 551 #if defined(PETSC_USE_COMPLEX) 552 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 553 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 554 #endif 555 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 556 rstart = A->rmap->rstart; 557 ai = aa->i; 558 aj = aa->j; 559 bi = bb->i; 560 bj = bb->j; 561 av = aa->a; 562 bv = bb->a; 563 564 garray = mat->garray; 565 566 if (reuse == MAT_INITIAL_MATRIX) { 567 nz = aa->nz + bb->nz; 568 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 569 col = row + bs2*nz; 570 val = (PetscScalar*)(col + bs2*nz); 571 572 *r = row; *c = col; *v = val; 573 } else { 574 row = *r; col = *c; val = *v; 575 } 576 577 jj = 0; irow = rstart; 578 for (i=0; i<mbs; i++) { 579 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 580 countA = ai[i+1] - ai[i]; 581 countB = bi[i+1] - bi[i]; 582 bjj = bj + bi[i]; 583 v1 = av + ai[i]*bs2; 584 v2 = bv + bi[i]*bs2; 585 586 if (bs>1) { 587 /* A-part */ 588 for (j=0; j<countA; j++) { 589 for (k=0; k<bs; k++) { 590 for (m=0; m<bs; m++) { 591 if (rstart + ajj[j]*bs>irow || k>=m) { 592 if (reuse == MAT_INITIAL_MATRIX) { 593 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 594 } 595 val[jj++] = v1[j*bs2 + m + k*bs]; 596 } 597 } 598 } 599 } 600 601 /* B-part */ 602 for (j=0; j < countB; j++) { 603 for (k=0; k<bs; k++) { 604 for (m=0; m<bs; m++) { 605 if (reuse == MAT_INITIAL_MATRIX) { 606 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 607 } 608 val[jj++] = v2[j*bs2 + m + k*bs]; 609 } 610 } 611 } 612 } else { 613 /* A-part */ 614 for (j=0; j<countA; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 617 } 618 val[jj++] = v1[j]; 619 } 620 621 /* B-part */ 622 for (j=0; j < countB; j++) { 623 if (reuse == MAT_INITIAL_MATRIX) { 624 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 625 } 626 val[jj++] = v2[j]; 627 } 628 } 629 irow+=bs; 630 } 631 *nnz = jj; 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 636 { 637 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 638 PetscErrorCode ierr; 639 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 640 PetscInt *row,*col; 641 const PetscScalar *av, *bv,*v1,*v2; 642 PetscScalar *val; 643 Mat Ad,Ao; 644 Mat_SeqAIJ *aa; 645 Mat_SeqAIJ *bb; 646 647 PetscFunctionBegin; 648 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 649 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 650 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 651 652 aa = (Mat_SeqAIJ*)(Ad)->data; 653 bb = (Mat_SeqAIJ*)(Ao)->data; 654 ai = aa->i; 655 aj = aa->j; 656 bi = bb->i; 657 bj = bb->j; 658 659 rstart = A->rmap->rstart; 660 661 if (reuse == MAT_INITIAL_MATRIX) { 662 nz = aa->nz + bb->nz; 663 *nnz = nz; 664 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 665 col = row + nz; 666 val = (PetscScalar*)(col + nz); 667 668 *r = row; *c = col; *v = val; 669 } else { 670 row = *r; col = *c; val = *v; 671 } 672 673 jj = 0; irow = rstart; 674 for (i=0; i<m; i++) { 675 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 676 countA = ai[i+1] - ai[i]; 677 countB = bi[i+1] - bi[i]; 678 bjj = bj + bi[i]; 679 v1 = av + ai[i]; 680 v2 = bv + bi[i]; 681 682 /* A-part */ 683 for (j=0; j<countA; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 686 } 687 val[jj++] = v1[j]; 688 } 689 690 /* B-part */ 691 for (j=0; j < countB; j++) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 694 } 695 val[jj++] = v2[j]; 696 } 697 irow++; 698 } 699 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 700 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 705 { 706 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 707 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 708 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 709 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 710 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 711 const PetscInt bs2=mat->bs2; 712 PetscErrorCode ierr; 713 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 714 PetscInt *row,*col; 715 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 716 PetscScalar *val; 717 718 PetscFunctionBegin; 719 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 720 if (reuse == MAT_INITIAL_MATRIX) { 721 nz = bs2*(aa->nz + bb->nz); 722 *nnz = nz; 723 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 724 col = row + nz; 725 val = (PetscScalar*)(col + nz); 726 727 *r = row; *c = col; *v = val; 728 } else { 729 row = *r; col = *c; val = *v; 730 } 731 732 jj = 0; irow = rstart; 733 for (i=0; i<mbs; i++) { 734 countA = ai[i+1] - ai[i]; 735 countB = bi[i+1] - bi[i]; 736 ajj = aj + ai[i]; 737 bjj = bj + bi[i]; 738 v1 = av + bs2*ai[i]; 739 v2 = bv + bs2*bi[i]; 740 741 idx = 0; 742 /* A-part */ 743 for (k=0; k<countA; k++) { 744 for (j=0; j<bs; j++) { 745 for (n=0; n<bs; n++) { 746 if (reuse == MAT_INITIAL_MATRIX) { 747 row[jj] = irow + n + shift; 748 col[jj] = rstart + bs*ajj[k] + j + shift; 749 } 750 val[jj++] = v1[idx++]; 751 } 752 } 753 } 754 755 idx = 0; 756 /* B-part */ 757 for (k=0; k<countB; k++) { 758 for (j=0; j<bs; j++) { 759 for (n=0; n<bs; n++) { 760 if (reuse == MAT_INITIAL_MATRIX) { 761 row[jj] = irow + n + shift; 762 col[jj] = bs*garray[bjj[k]] + j + shift; 763 } 764 val[jj++] = v2[idx++]; 765 } 766 } 767 } 768 irow += bs; 769 } 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 774 { 775 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 776 PetscErrorCode ierr; 777 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 778 PetscInt *row,*col; 779 const PetscScalar *av, *bv,*v1,*v2; 780 PetscScalar *val; 781 Mat Ad,Ao; 782 Mat_SeqAIJ *aa; 783 Mat_SeqAIJ *bb; 784 #if defined(PETSC_USE_COMPLEX) 785 PetscBool hermitian; 786 #endif 787 788 PetscFunctionBegin; 789 #if defined(PETSC_USE_COMPLEX) 790 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 791 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 792 #endif 793 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 794 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 795 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 796 797 aa = (Mat_SeqAIJ*)(Ad)->data; 798 bb = (Mat_SeqAIJ*)(Ao)->data; 799 ai = aa->i; 800 aj = aa->j; 801 adiag = aa->diag; 802 bi = bb->i; 803 bj = bb->j; 804 805 rstart = A->rmap->rstart; 806 807 if (reuse == MAT_INITIAL_MATRIX) { 808 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 809 nzb = 0; /* num of upper triangular entries in mat->B */ 810 for (i=0; i<m; i++) { 811 nza += (ai[i+1] - adiag[i]); 812 countB = bi[i+1] - bi[i]; 813 bjj = bj + bi[i]; 814 for (j=0; j<countB; j++) { 815 if (garray[bjj[j]] > rstart) nzb++; 816 } 817 } 818 819 nz = nza + nzb; /* total nz of upper triangular part of mat */ 820 *nnz = nz; 821 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 822 col = row + nz; 823 val = (PetscScalar*)(col + nz); 824 825 *r = row; *c = col; *v = val; 826 } else { 827 row = *r; col = *c; val = *v; 828 } 829 830 jj = 0; irow = rstart; 831 for (i=0; i<m; i++) { 832 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 833 v1 = av + adiag[i]; 834 countA = ai[i+1] - adiag[i]; 835 countB = bi[i+1] - bi[i]; 836 bjj = bj + bi[i]; 837 v2 = bv + bi[i]; 838 839 /* A-part */ 840 for (j=0; j<countA; j++) { 841 if (reuse == MAT_INITIAL_MATRIX) { 842 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 843 } 844 val[jj++] = v1[j]; 845 } 846 847 /* B-part */ 848 for (j=0; j < countB; j++) { 849 if (garray[bjj[j]] > rstart) { 850 if (reuse == MAT_INITIAL_MATRIX) { 851 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 852 } 853 val[jj++] = v2[j]; 854 } 855 } 856 irow++; 857 } 858 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 859 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatDestroy_MUMPS(Mat A) 864 { 865 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 870 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 871 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 872 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 873 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 874 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 875 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 876 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 877 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 878 mumps->id.job = JOB_END; 879 PetscMUMPS_c(mumps); 880 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 881 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 882 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 883 #endif 884 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 885 ierr = PetscFree(A->data);CHKERRQ(ierr); 886 887 /* clear composed functions */ 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 892 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 893 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 894 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 898 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 899 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 905 { 906 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 907 PetscScalar *array; 908 Vec b_seq; 909 IS is_iden,is_petsc; 910 PetscErrorCode ierr; 911 PetscInt i; 912 PetscBool second_solve = PETSC_FALSE; 913 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 914 915 PetscFunctionBegin; 916 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 917 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 918 919 if (A->factorerrortype) { 920 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 921 ierr = VecSetInf(x);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 mumps->id.ICNTL(20) = 0; /* dense RHS */ 926 mumps->id.nrhs = 1; 927 b_seq = mumps->b_seq; 928 if (mumps->petsc_size > 1) { 929 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 930 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 933 } else { /* petsc_size == 1 */ 934 ierr = VecCopy(b,x);CHKERRQ(ierr); 935 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 936 } 937 if (!mumps->myid) { /* define rhs on the host */ 938 mumps->id.nrhs = 1; 939 mumps->id.rhs = (MumpsScalar*)array; 940 } 941 942 /* 943 handle condensation step of Schur complement (if any) 944 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 945 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 946 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 947 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 948 */ 949 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 950 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 951 second_solve = PETSC_TRUE; 952 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 953 } 954 /* solve phase */ 955 /*-------------*/ 956 mumps->id.job = JOB_SOLVE; 957 PetscMUMPS_c(mumps); 958 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 959 960 /* handle expansion step of Schur complement (if any) */ 961 if (second_solve) { 962 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 963 } 964 965 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 966 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 967 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 968 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 969 } 970 if (!mumps->scat_sol) { /* create scatter scat_sol */ 971 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 972 for (i=0; i<mumps->id.lsol_loc; i++) { 973 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 974 } 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 976 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 977 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 978 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 979 980 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 981 } 982 983 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 } 986 987 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 988 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 989 990 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 995 { 996 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 mumps->id.ICNTL(9) = 0; 1001 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1002 mumps->id.ICNTL(9) = 1; 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1007 { 1008 PetscErrorCode ierr; 1009 Mat Bt = NULL; 1010 PetscBool flg, flgT; 1011 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1012 PetscInt i,nrhs,M; 1013 PetscScalar *array; 1014 const PetscScalar *rbray; 1015 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1016 PetscScalar *bray,*sol_loc,*sol_loc_save; 1017 IS is_to,is_from; 1018 PetscInt k,proc,j,m,myrstart; 1019 const PetscInt *rstart; 1020 Vec v_mpi,b_seq,msol_loc; 1021 VecScatter scat_rhs,scat_sol; 1022 PetscScalar *aa; 1023 PetscInt spnr,*ia,*ja; 1024 Mat_MPIAIJ *b = NULL; 1025 1026 PetscFunctionBegin; 1027 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1028 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1029 1030 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1031 if (flg) { /* dense B */ 1032 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1033 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1034 } else { /* sparse B */ 1035 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1036 if (flgT) { /* input B is transpose of actural RHS matrix, 1037 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1038 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1039 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1040 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1041 } 1042 1043 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1044 mumps->id.nrhs = nrhs; 1045 mumps->id.lrhs = M; 1046 mumps->id.rhs = NULL; 1047 1048 if (mumps->petsc_size == 1) { 1049 PetscScalar *aa; 1050 PetscInt spnr,*ia,*ja; 1051 PetscBool second_solve = PETSC_FALSE; 1052 1053 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1054 mumps->id.rhs = (MumpsScalar*)array; 1055 1056 if (!Bt) { /* dense B */ 1057 /* copy B to X */ 1058 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1059 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1060 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1061 } else { /* sparse B */ 1062 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1063 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1064 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1065 /* mumps requires ia and ja start at 1! */ 1066 mumps->id.irhs_ptr = ia; 1067 mumps->id.irhs_sparse = ja; 1068 mumps->id.nz_rhs = ia[spnr] - 1; 1069 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1070 } 1071 /* handle condensation step of Schur complement (if any) */ 1072 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1073 second_solve = PETSC_TRUE; 1074 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1075 } 1076 /* solve phase */ 1077 /*-------------*/ 1078 mumps->id.job = JOB_SOLVE; 1079 PetscMUMPS_c(mumps); 1080 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1081 1082 /* handle expansion step of Schur complement (if any) */ 1083 if (second_solve) { 1084 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1085 } 1086 if (Bt) { /* sparse B */ 1087 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1088 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1089 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1090 } 1091 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1096 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1097 1098 /* create msol_loc to hold mumps local solution */ 1099 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1100 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1101 1102 lsol_loc = mumps->id.lsol_loc; 1103 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1104 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1105 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1106 mumps->id.isol_loc = isol_loc; 1107 1108 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1109 1110 if (!Bt) { /* dense B */ 1111 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1112 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1113 0, re-arrange B into desired order, which is a local operation. 1114 */ 1115 1116 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1117 /* wrap dense rhs matrix B into a vector v_mpi */ 1118 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1119 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1121 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1122 1123 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1124 if (!mumps->myid) { 1125 PetscInt *idx; 1126 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1127 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1128 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1129 k = 0; 1130 for (proc=0; proc<mumps->petsc_size; proc++){ 1131 for (j=0; j<nrhs; j++){ 1132 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1133 } 1134 } 1135 1136 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1137 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1138 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1139 } else { 1140 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1141 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1142 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1143 } 1144 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1145 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1147 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1148 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 1150 if (!mumps->myid) { /* define rhs on the host */ 1151 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1152 mumps->id.rhs = (MumpsScalar*)bray; 1153 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1154 } 1155 1156 } else { /* sparse B */ 1157 b = (Mat_MPIAIJ*)Bt->data; 1158 1159 /* wrap dense X into a vector v_mpi */ 1160 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1161 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1162 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1163 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1164 1165 if (!mumps->myid) { 1166 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1167 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1168 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1169 /* mumps requires ia and ja start at 1! */ 1170 mumps->id.irhs_ptr = ia; 1171 mumps->id.irhs_sparse = ja; 1172 mumps->id.nz_rhs = ia[spnr] - 1; 1173 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1174 } else { 1175 mumps->id.irhs_ptr = NULL; 1176 mumps->id.irhs_sparse = NULL; 1177 mumps->id.nz_rhs = 0; 1178 mumps->id.rhs_sparse = NULL; 1179 } 1180 } 1181 1182 /* solve phase */ 1183 /*-------------*/ 1184 mumps->id.job = JOB_SOLVE; 1185 PetscMUMPS_c(mumps); 1186 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1187 1188 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1189 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1190 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1191 1192 /* create scatter scat_sol */ 1193 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1194 /* iidx: index for scatter mumps solution to petsc X */ 1195 1196 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1197 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1198 for (i=0; i<lsol_loc; i++) { 1199 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1200 1201 for (proc=0; proc<mumps->petsc_size; proc++){ 1202 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1203 myrstart = rstart[proc]; 1204 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1205 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1206 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1207 break; 1208 } 1209 } 1210 1211 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1212 } 1213 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1214 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1215 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1218 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1220 1221 /* free spaces */ 1222 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1223 mumps->id.isol_loc = isol_loc_save; 1224 1225 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1226 ierr = PetscFree(idxx);CHKERRQ(ierr); 1227 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1228 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1229 if (Bt) { 1230 if (!mumps->myid) { 1231 b = (Mat_MPIAIJ*)Bt->data; 1232 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1233 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1234 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1235 } 1236 } else { 1237 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1238 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1239 } 1240 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1241 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1246 { 1247 PetscErrorCode ierr; 1248 PetscBool flg; 1249 Mat B; 1250 1251 PetscFunctionBegin; 1252 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1253 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1254 1255 /* Create B=Bt^T that uses Bt's data structure */ 1256 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1257 1258 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1259 ierr = MatDestroy(&B);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #if !defined(PETSC_USE_COMPLEX) 1264 /* 1265 input: 1266 F: numeric factor 1267 output: 1268 nneg: total number of negative pivots 1269 nzero: total number of zero pivots 1270 npos: (global dimension of F) - nneg - nzero 1271 */ 1272 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1273 { 1274 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1275 PetscErrorCode ierr; 1276 PetscMPIInt size; 1277 1278 PetscFunctionBegin; 1279 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1280 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1281 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1282 1283 if (nneg) *nneg = mumps->id.INFOG(12); 1284 if (nzero || npos) { 1285 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1286 if (nzero) *nzero = mumps->id.INFOG(28); 1287 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1288 } 1289 PetscFunctionReturn(0); 1290 } 1291 #endif 1292 1293 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1294 { 1295 PetscErrorCode ierr; 1296 PetscInt i,nz=0,*irn,*jcn=0; 1297 PetscScalar *val=0; 1298 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1299 1300 PetscFunctionBegin; 1301 if (mumps->omp_comm_size > 1) { 1302 if (reuse == MAT_INITIAL_MATRIX) { 1303 /* master first gathers counts of nonzeros to receive */ 1304 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1305 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1306 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1307 1308 /* master allocates memory to receive nonzeros */ 1309 if (mumps->is_omp_master) { 1310 displs[0] = 0; 1311 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1312 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1313 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1314 jcn = irn + nz; 1315 val = (PetscScalar*)(jcn + nz); 1316 } 1317 1318 /* save the gatherv plan */ 1319 mumps->mpinz = mpinz; /* used as send count */ 1320 mumps->recvcount = recvcount; 1321 mumps->displs = displs; 1322 1323 /* master gathers nonzeros */ 1324 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1325 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1326 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1327 1328 /* master frees its row/col/val and replaces them with bigger arrays */ 1329 if (mumps->is_omp_master) { 1330 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1331 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1332 mumps->irn = irn; 1333 mumps->jcn = jcn; 1334 mumps->val = val; 1335 } 1336 } else { 1337 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1338 } 1339 } 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1344 { 1345 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1346 PetscErrorCode ierr; 1347 PetscBool isMPIAIJ; 1348 1349 PetscFunctionBegin; 1350 if (mumps->id.INFOG(1) < 0) { 1351 if (mumps->id.INFOG(1) == -6) { 1352 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1353 } 1354 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 1358 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1359 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1360 1361 /* numerical factorization phase */ 1362 /*-------------------------------*/ 1363 mumps->id.job = JOB_FACTNUMERIC; 1364 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1365 if (!mumps->myid) { 1366 mumps->id.a = (MumpsScalar*)mumps->val; 1367 } 1368 } else { 1369 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1370 } 1371 PetscMUMPS_c(mumps); 1372 if (mumps->id.INFOG(1) < 0) { 1373 if (A->erroriffailure) { 1374 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1375 } else { 1376 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1377 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1378 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1379 } else if (mumps->id.INFOG(1) == -13) { 1380 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1381 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1382 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1383 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1384 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1385 } else { 1386 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1387 F->factorerrortype = MAT_FACTOR_OTHER; 1388 } 1389 } 1390 } 1391 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1392 1393 F->assembled = PETSC_TRUE; 1394 mumps->matstruc = SAME_NONZERO_PATTERN; 1395 if (F->schur) { /* reset Schur status to unfactored */ 1396 #if defined(PETSC_HAVE_CUDA) 1397 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1398 #endif 1399 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1400 mumps->id.ICNTL(19) = 2; 1401 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1402 } 1403 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1404 } 1405 1406 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1407 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1408 1409 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1410 if (mumps->petsc_size > 1) { 1411 PetscInt lsol_loc; 1412 PetscScalar *sol_loc; 1413 1414 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1415 1416 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1417 if (mumps->x_seq) { 1418 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1419 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1420 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1421 } 1422 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1423 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1424 mumps->id.lsol_loc = lsol_loc; 1425 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1426 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1427 } 1428 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 /* Sets MUMPS options from the options database */ 1433 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1434 { 1435 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1436 PetscErrorCode ierr; 1437 PetscInt icntl,info[80],i,ninfo=80; 1438 PetscBool flg; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1442 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1443 if (flg) mumps->id.ICNTL(1) = icntl; 1444 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1445 if (flg) mumps->id.ICNTL(2) = icntl; 1446 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1447 if (flg) mumps->id.ICNTL(3) = icntl; 1448 1449 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1450 if (flg) mumps->id.ICNTL(4) = icntl; 1451 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1452 1453 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1454 if (flg) mumps->id.ICNTL(6) = icntl; 1455 1456 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1457 if (flg) { 1458 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1459 else mumps->id.ICNTL(7) = icntl; 1460 } 1461 1462 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1463 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1464 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1465 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1466 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1467 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1468 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1470 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1471 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1472 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1473 } 1474 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1475 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1476 1477 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1480 if (mumps->id.ICNTL(24)) { 1481 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1482 } 1483 1484 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1487 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1488 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1489 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1490 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1491 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1492 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1493 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1494 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1495 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1496 1497 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1498 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1499 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1500 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1501 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1502 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1503 1504 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1505 1506 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1507 if (ninfo) { 1508 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1509 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1510 mumps->ninfo = ninfo; 1511 for (i=0; i<ninfo; i++) { 1512 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1513 else mumps->info[i] = info[i]; 1514 } 1515 } 1516 1517 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1522 { 1523 PetscErrorCode ierr; 1524 PetscInt nthreads=0; 1525 1526 PetscFunctionBegin; 1527 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1528 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1529 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1530 1531 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1532 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1533 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1534 if (mumps->use_petsc_omp_support) { 1535 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1536 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1537 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1538 #else 1539 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1540 #endif 1541 } else { 1542 mumps->omp_comm = PETSC_COMM_SELF; 1543 mumps->mumps_comm = mumps->petsc_comm; 1544 mumps->is_omp_master = PETSC_TRUE; 1545 } 1546 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1547 1548 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1549 mumps->id.job = JOB_INIT; 1550 mumps->id.par = 1; /* host participates factorizaton and solve */ 1551 mumps->id.sym = mumps->sym; 1552 1553 PetscMUMPS_c(mumps); 1554 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1555 1556 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1557 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1558 */ 1559 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1560 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1561 1562 mumps->scat_rhs = NULL; 1563 mumps->scat_sol = NULL; 1564 1565 /* set PETSc-MUMPS default options - override MUMPS default */ 1566 mumps->id.ICNTL(3) = 0; 1567 mumps->id.ICNTL(4) = 0; 1568 if (mumps->petsc_size == 1) { 1569 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1570 } else { 1571 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1572 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1573 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1574 } 1575 1576 /* schur */ 1577 mumps->id.size_schur = 0; 1578 mumps->id.listvar_schur = NULL; 1579 mumps->id.schur = NULL; 1580 mumps->sizeredrhs = 0; 1581 mumps->schur_sol = NULL; 1582 mumps->schur_sizesol = 0; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1587 { 1588 PetscErrorCode ierr; 1589 1590 PetscFunctionBegin; 1591 if (mumps->id.INFOG(1) < 0) { 1592 if (A->erroriffailure) { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1594 } else { 1595 if (mumps->id.INFOG(1) == -6) { 1596 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1597 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1598 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1599 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1600 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1601 } else { 1602 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1603 F->factorerrortype = MAT_FACTOR_OTHER; 1604 } 1605 } 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1611 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1612 { 1613 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1614 PetscErrorCode ierr; 1615 Vec b; 1616 const PetscInt M = A->rmap->N; 1617 1618 PetscFunctionBegin; 1619 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1620 1621 /* Set MUMPS options from the options database */ 1622 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1623 1624 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1625 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1626 1627 /* analysis phase */ 1628 /*----------------*/ 1629 mumps->id.job = JOB_FACTSYMBOLIC; 1630 mumps->id.n = M; 1631 switch (mumps->id.ICNTL(18)) { 1632 case 0: /* centralized assembled matrix input */ 1633 if (!mumps->myid) { 1634 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1635 if (mumps->id.ICNTL(6)>1) { 1636 mumps->id.a = (MumpsScalar*)mumps->val; 1637 } 1638 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1639 /* 1640 PetscBool flag; 1641 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1642 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1643 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1644 */ 1645 if (!mumps->myid) { 1646 const PetscInt *idx; 1647 PetscInt i,*perm_in; 1648 1649 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1650 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1651 1652 mumps->id.perm_in = perm_in; 1653 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1654 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1655 } 1656 } 1657 } 1658 break; 1659 case 3: /* distributed assembled matrix input (size>1) */ 1660 mumps->id.nz_loc = mumps->nz; 1661 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1662 if (mumps->id.ICNTL(6)>1) { 1663 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1664 } 1665 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1666 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1667 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1668 ierr = VecDestroy(&b);CHKERRQ(ierr); 1669 break; 1670 } 1671 PetscMUMPS_c(mumps); 1672 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1673 1674 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1675 F->ops->solve = MatSolve_MUMPS; 1676 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1677 F->ops->matsolve = MatMatSolve_MUMPS; 1678 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 /* Note the Petsc r and c permutations are ignored */ 1683 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1684 { 1685 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1686 PetscErrorCode ierr; 1687 Vec b; 1688 const PetscInt M = A->rmap->N; 1689 1690 PetscFunctionBegin; 1691 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1692 1693 /* Set MUMPS options from the options database */ 1694 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1695 1696 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1697 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1698 1699 /* analysis phase */ 1700 /*----------------*/ 1701 mumps->id.job = JOB_FACTSYMBOLIC; 1702 mumps->id.n = M; 1703 switch (mumps->id.ICNTL(18)) { 1704 case 0: /* centralized assembled matrix input */ 1705 if (!mumps->myid) { 1706 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1707 if (mumps->id.ICNTL(6)>1) { 1708 mumps->id.a = (MumpsScalar*)mumps->val; 1709 } 1710 } 1711 break; 1712 case 3: /* distributed assembled matrix input (size>1) */ 1713 mumps->id.nz_loc = mumps->nz; 1714 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1715 if (mumps->id.ICNTL(6)>1) { 1716 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1717 } 1718 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1719 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1720 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1721 ierr = VecDestroy(&b);CHKERRQ(ierr); 1722 break; 1723 } 1724 PetscMUMPS_c(mumps); 1725 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1726 1727 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1728 F->ops->solve = MatSolve_MUMPS; 1729 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 /* Note the Petsc r permutation and factor info are ignored */ 1734 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1735 { 1736 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1737 PetscErrorCode ierr; 1738 Vec b; 1739 const PetscInt M = A->rmap->N; 1740 1741 PetscFunctionBegin; 1742 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1743 1744 /* Set MUMPS options from the options database */ 1745 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1746 1747 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1748 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1749 1750 /* analysis phase */ 1751 /*----------------*/ 1752 mumps->id.job = JOB_FACTSYMBOLIC; 1753 mumps->id.n = M; 1754 switch (mumps->id.ICNTL(18)) { 1755 case 0: /* centralized assembled matrix input */ 1756 if (!mumps->myid) { 1757 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1758 if (mumps->id.ICNTL(6)>1) { 1759 mumps->id.a = (MumpsScalar*)mumps->val; 1760 } 1761 } 1762 break; 1763 case 3: /* distributed assembled matrix input (size>1) */ 1764 mumps->id.nz_loc = mumps->nz; 1765 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1766 if (mumps->id.ICNTL(6)>1) { 1767 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1768 } 1769 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1770 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1771 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1772 ierr = VecDestroy(&b);CHKERRQ(ierr); 1773 break; 1774 } 1775 PetscMUMPS_c(mumps); 1776 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1777 1778 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1779 F->ops->solve = MatSolve_MUMPS; 1780 F->ops->solvetranspose = MatSolve_MUMPS; 1781 F->ops->matsolve = MatMatSolve_MUMPS; 1782 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1783 #if defined(PETSC_USE_COMPLEX) 1784 F->ops->getinertia = NULL; 1785 #else 1786 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1787 #endif 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1792 { 1793 PetscErrorCode ierr; 1794 PetscBool iascii; 1795 PetscViewerFormat format; 1796 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1797 1798 PetscFunctionBegin; 1799 /* check if matrix is mumps type */ 1800 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1801 1802 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1803 if (iascii) { 1804 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1805 if (format == PETSC_VIEWER_ASCII_INFO) { 1806 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1819 if (mumps->id.ICNTL(11)>0) { 1820 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1826 } 1827 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1830 /* ICNTL(15-17) not used */ 1831 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1837 1838 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1844 1845 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1851 1852 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1858 1859 /* infomation local to each processor */ 1860 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1862 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1863 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1864 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1865 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1866 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1867 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1868 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1869 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1870 1871 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1872 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1873 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1874 1875 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1876 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1877 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1878 1879 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1880 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1881 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1882 1883 if (mumps->ninfo && mumps->ninfo <= 80){ 1884 PetscInt i; 1885 for (i=0; i<mumps->ninfo; i++){ 1886 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1887 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1888 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1889 } 1890 } 1891 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1892 1893 if (!mumps->myid) { /* information from the host */ 1894 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1898 1899 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1925 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1926 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1927 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1928 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1929 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1930 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1931 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1932 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1933 } 1934 } 1935 } 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1940 { 1941 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1942 1943 PetscFunctionBegin; 1944 info->block_size = 1.0; 1945 info->nz_allocated = mumps->id.INFOG(20); 1946 info->nz_used = mumps->id.INFOG(20); 1947 info->nz_unneeded = 0.0; 1948 info->assemblies = 0.0; 1949 info->mallocs = 0.0; 1950 info->memory = 0.0; 1951 info->fill_ratio_given = 0; 1952 info->fill_ratio_needed = 0; 1953 info->factor_mallocs = 0; 1954 PetscFunctionReturn(0); 1955 } 1956 1957 /* -------------------------------------------------------------------------------------------*/ 1958 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1959 { 1960 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1961 const PetscScalar *arr; 1962 const PetscInt *idxs; 1963 PetscInt size,i; 1964 PetscErrorCode ierr; 1965 1966 PetscFunctionBegin; 1967 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1968 if (mumps->petsc_size > 1) { 1969 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1970 1971 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1972 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1973 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1974 } 1975 1976 /* Schur complement matrix */ 1977 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1978 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1979 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1980 mumps->id.schur = (MumpsScalar*)arr; 1981 mumps->id.size_schur = size; 1982 mumps->id.schur_lld = size; 1983 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1984 if (mumps->sym == 1) { 1985 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1986 } 1987 1988 /* MUMPS expects Fortran style indices */ 1989 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1990 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1991 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1992 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1993 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1994 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1995 if (mumps->petsc_size > 1) { 1996 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1997 } else { 1998 if (F->factortype == MAT_FACTOR_LU) { 1999 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2000 } else { 2001 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2002 } 2003 } 2004 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2005 mumps->id.ICNTL(26) = -1; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 /* -------------------------------------------------------------------------------------------*/ 2010 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2011 { 2012 Mat St; 2013 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2014 PetscScalar *array; 2015 #if defined(PETSC_USE_COMPLEX) 2016 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2017 #endif 2018 PetscErrorCode ierr; 2019 2020 PetscFunctionBegin; 2021 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2022 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2023 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2024 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2025 ierr = MatSetUp(St);CHKERRQ(ierr); 2026 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2027 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2028 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2029 PetscInt i,j,N=mumps->id.size_schur; 2030 for (i=0;i<N;i++) { 2031 for (j=0;j<N;j++) { 2032 #if !defined(PETSC_USE_COMPLEX) 2033 PetscScalar val = mumps->id.schur[i*N+j]; 2034 #else 2035 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2036 #endif 2037 array[j*N+i] = val; 2038 } 2039 } 2040 } else { /* stored by columns */ 2041 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2042 } 2043 } else { /* either full or lower-triangular (not packed) */ 2044 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2045 PetscInt i,j,N=mumps->id.size_schur; 2046 for (i=0;i<N;i++) { 2047 for (j=i;j<N;j++) { 2048 #if !defined(PETSC_USE_COMPLEX) 2049 PetscScalar val = mumps->id.schur[i*N+j]; 2050 #else 2051 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2052 #endif 2053 array[i*N+j] = val; 2054 array[j*N+i] = val; 2055 } 2056 } 2057 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2058 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2059 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2060 PetscInt i,j,N=mumps->id.size_schur; 2061 for (i=0;i<N;i++) { 2062 for (j=0;j<i+1;j++) { 2063 #if !defined(PETSC_USE_COMPLEX) 2064 PetscScalar val = mumps->id.schur[i*N+j]; 2065 #else 2066 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2067 #endif 2068 array[i*N+j] = val; 2069 array[j*N+i] = val; 2070 } 2071 } 2072 } 2073 } 2074 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2075 *S = St; 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /* -------------------------------------------------------------------------------------------*/ 2080 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2081 { 2082 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2083 2084 PetscFunctionBegin; 2085 mumps->id.ICNTL(icntl) = ival; 2086 PetscFunctionReturn(0); 2087 } 2088 2089 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2090 { 2091 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2092 2093 PetscFunctionBegin; 2094 *ival = mumps->id.ICNTL(icntl); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /*@ 2099 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2100 2101 Logically Collective on Mat 2102 2103 Input Parameters: 2104 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2105 . icntl - index of MUMPS parameter array ICNTL() 2106 - ival - value of MUMPS ICNTL(icntl) 2107 2108 Options Database: 2109 . -mat_mumps_icntl_<icntl> <ival> 2110 2111 Level: beginner 2112 2113 References: 2114 . MUMPS Users' Guide 2115 2116 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2117 @*/ 2118 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2119 { 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 PetscValidType(F,1); 2124 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2125 PetscValidLogicalCollectiveInt(F,icntl,2); 2126 PetscValidLogicalCollectiveInt(F,ival,3); 2127 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /*@ 2132 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2133 2134 Logically Collective on Mat 2135 2136 Input Parameters: 2137 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2138 - icntl - index of MUMPS parameter array ICNTL() 2139 2140 Output Parameter: 2141 . ival - value of MUMPS ICNTL(icntl) 2142 2143 Level: beginner 2144 2145 References: 2146 . MUMPS Users' Guide 2147 2148 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2149 @*/ 2150 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2151 { 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 PetscValidType(F,1); 2156 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2157 PetscValidLogicalCollectiveInt(F,icntl,2); 2158 PetscValidIntPointer(ival,3); 2159 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 /* -------------------------------------------------------------------------------------------*/ 2164 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2165 { 2166 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2167 2168 PetscFunctionBegin; 2169 mumps->id.CNTL(icntl) = val; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2174 { 2175 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2176 2177 PetscFunctionBegin; 2178 *val = mumps->id.CNTL(icntl); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 /*@ 2183 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2184 2185 Logically Collective on Mat 2186 2187 Input Parameters: 2188 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2189 . icntl - index of MUMPS parameter array CNTL() 2190 - val - value of MUMPS CNTL(icntl) 2191 2192 Options Database: 2193 . -mat_mumps_cntl_<icntl> <val> 2194 2195 Level: beginner 2196 2197 References: 2198 . MUMPS Users' Guide 2199 2200 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2201 @*/ 2202 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2203 { 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 PetscValidType(F,1); 2208 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2209 PetscValidLogicalCollectiveInt(F,icntl,2); 2210 PetscValidLogicalCollectiveReal(F,val,3); 2211 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 /*@ 2216 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2217 2218 Logically Collective on Mat 2219 2220 Input Parameters: 2221 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2222 - icntl - index of MUMPS parameter array CNTL() 2223 2224 Output Parameter: 2225 . val - value of MUMPS CNTL(icntl) 2226 2227 Level: beginner 2228 2229 References: 2230 . MUMPS Users' Guide 2231 2232 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2233 @*/ 2234 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2235 { 2236 PetscErrorCode ierr; 2237 2238 PetscFunctionBegin; 2239 PetscValidType(F,1); 2240 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2241 PetscValidLogicalCollectiveInt(F,icntl,2); 2242 PetscValidRealPointer(val,3); 2243 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2248 { 2249 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2250 2251 PetscFunctionBegin; 2252 *info = mumps->id.INFO(icntl); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2257 { 2258 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2259 2260 PetscFunctionBegin; 2261 *infog = mumps->id.INFOG(icntl); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2266 { 2267 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2268 2269 PetscFunctionBegin; 2270 *rinfo = mumps->id.RINFO(icntl); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2275 { 2276 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2277 2278 PetscFunctionBegin; 2279 *rinfog = mumps->id.RINFOG(icntl); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2284 { 2285 PetscErrorCode ierr; 2286 Mat Bt = NULL,Btseq = NULL; 2287 PetscBool flg; 2288 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2289 PetscScalar *aa; 2290 PetscInt spnr,*ia,*ja; 2291 2292 PetscFunctionBegin; 2293 PetscValidIntPointer(spRHS,2); 2294 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2295 if (flg) { 2296 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2297 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2298 2299 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2300 2301 if (mumps->petsc_size > 1) { 2302 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2303 Btseq = b->A; 2304 } else { 2305 Btseq = Bt; 2306 } 2307 2308 if (!mumps->myid) { 2309 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2310 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2311 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2312 2313 mumps->id.irhs_ptr = ia; 2314 mumps->id.irhs_sparse = ja; 2315 mumps->id.nz_rhs = ia[spnr] - 1; 2316 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2317 } else { 2318 mumps->id.irhs_ptr = NULL; 2319 mumps->id.irhs_sparse = NULL; 2320 mumps->id.nz_rhs = 0; 2321 mumps->id.rhs_sparse = NULL; 2322 } 2323 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2324 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2325 2326 /* solve phase */ 2327 /*-------------*/ 2328 mumps->id.job = JOB_SOLVE; 2329 PetscMUMPS_c(mumps); 2330 if (mumps->id.INFOG(1) < 0) 2331 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2332 2333 if (!mumps->myid) { 2334 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2335 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2336 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2337 } 2338 PetscFunctionReturn(0); 2339 } 2340 2341 /*@ 2342 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2343 2344 Logically Collective on Mat 2345 2346 Input Parameters: 2347 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2348 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2349 2350 Output Parameter: 2351 . spRHS - requested entries of inverse of A 2352 2353 Level: beginner 2354 2355 References: 2356 . MUMPS Users' Guide 2357 2358 .seealso: MatGetFactor(), MatCreateTranspose() 2359 @*/ 2360 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 PetscValidType(F,1); 2366 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2367 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2372 { 2373 PetscErrorCode ierr; 2374 Mat spRHS; 2375 2376 PetscFunctionBegin; 2377 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2378 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2379 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 /*@ 2384 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2385 2386 Logically Collective on Mat 2387 2388 Input Parameters: 2389 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2390 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2391 2392 Output Parameter: 2393 . spRHST - requested entries of inverse of A^T 2394 2395 Level: beginner 2396 2397 References: 2398 . MUMPS Users' Guide 2399 2400 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2401 @*/ 2402 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2403 { 2404 PetscErrorCode ierr; 2405 PetscBool flg; 2406 2407 PetscFunctionBegin; 2408 PetscValidType(F,1); 2409 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2410 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2411 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2412 2413 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 /*@ 2418 MatMumpsGetInfo - Get MUMPS parameter INFO() 2419 2420 Logically Collective on Mat 2421 2422 Input Parameters: 2423 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2424 - icntl - index of MUMPS parameter array INFO() 2425 2426 Output Parameter: 2427 . ival - value of MUMPS INFO(icntl) 2428 2429 Level: beginner 2430 2431 References: 2432 . MUMPS Users' Guide 2433 2434 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2435 @*/ 2436 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2437 { 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 PetscValidType(F,1); 2442 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2443 PetscValidIntPointer(ival,3); 2444 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 /*@ 2449 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2450 2451 Logically Collective on Mat 2452 2453 Input Parameters: 2454 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2455 - icntl - index of MUMPS parameter array INFOG() 2456 2457 Output Parameter: 2458 . ival - value of MUMPS INFOG(icntl) 2459 2460 Level: beginner 2461 2462 References: 2463 . MUMPS Users' Guide 2464 2465 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2466 @*/ 2467 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2468 { 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 PetscValidType(F,1); 2473 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2474 PetscValidIntPointer(ival,3); 2475 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 /*@ 2480 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2481 2482 Logically Collective on Mat 2483 2484 Input Parameters: 2485 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2486 - icntl - index of MUMPS parameter array RINFO() 2487 2488 Output Parameter: 2489 . val - value of MUMPS RINFO(icntl) 2490 2491 Level: beginner 2492 2493 References: 2494 . MUMPS Users' Guide 2495 2496 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2497 @*/ 2498 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2499 { 2500 PetscErrorCode ierr; 2501 2502 PetscFunctionBegin; 2503 PetscValidType(F,1); 2504 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2505 PetscValidRealPointer(val,3); 2506 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 /*@ 2511 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2512 2513 Logically Collective on Mat 2514 2515 Input Parameters: 2516 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2517 - icntl - index of MUMPS parameter array RINFOG() 2518 2519 Output Parameter: 2520 . val - value of MUMPS RINFOG(icntl) 2521 2522 Level: beginner 2523 2524 References: 2525 . MUMPS Users' Guide 2526 2527 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2528 @*/ 2529 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2530 { 2531 PetscErrorCode ierr; 2532 2533 PetscFunctionBegin; 2534 PetscValidType(F,1); 2535 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2536 PetscValidRealPointer(val,3); 2537 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 /*MC 2542 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2543 distributed and sequential matrices via the external package MUMPS. 2544 2545 Works with MATAIJ and MATSBAIJ matrices 2546 2547 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2548 2549 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2550 2551 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2552 2553 Options Database Keys: 2554 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2555 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2556 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2557 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2558 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2559 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2560 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2561 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2562 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2563 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2564 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2565 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2566 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2567 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2568 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2569 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2570 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2571 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2572 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2573 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2574 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2575 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2576 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2577 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2578 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2579 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2580 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2581 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2582 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2583 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2584 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2585 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2586 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2587 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2588 2589 Level: beginner 2590 2591 Notes: 2592 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2593 2594 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2595 $ KSPGetPC(ksp,&pc); 2596 $ PCFactorGetMatrix(pc,&mat); 2597 $ MatMumpsGetInfo(mat,....); 2598 $ MatMumpsGetInfog(mat,....); etc. 2599 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2600 2601 Two modes to run MUMPS/PETSc with OpenMP 2602 2603 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2604 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2605 2606 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2607 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2608 2609 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2610 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2611 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2612 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2613 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2614 2615 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2616 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2617 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2618 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2619 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2620 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2621 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2622 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2623 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2624 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2625 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2626 examine the mapping result. 2627 2628 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2629 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2630 calls omp_set_num_threads(m) internally before calling MUMPS. 2631 2632 References: 2633 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2634 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2635 2636 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2637 2638 M*/ 2639 2640 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2641 { 2642 PetscFunctionBegin; 2643 *type = MATSOLVERMUMPS; 2644 PetscFunctionReturn(0); 2645 } 2646 2647 /* MatGetFactor for Seq and MPI AIJ matrices */ 2648 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2649 { 2650 Mat B; 2651 PetscErrorCode ierr; 2652 Mat_MUMPS *mumps; 2653 PetscBool isSeqAIJ; 2654 2655 PetscFunctionBegin; 2656 /* Create the factorization matrix */ 2657 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2658 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2659 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2660 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2661 ierr = MatSetUp(B);CHKERRQ(ierr); 2662 2663 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2664 2665 B->ops->view = MatView_MUMPS; 2666 B->ops->getinfo = MatGetInfo_MUMPS; 2667 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2681 2682 if (ftype == MAT_FACTOR_LU) { 2683 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2684 B->factortype = MAT_FACTOR_LU; 2685 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2686 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2687 mumps->sym = 0; 2688 } else { 2689 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2690 B->factortype = MAT_FACTOR_CHOLESKY; 2691 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2692 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2693 #if defined(PETSC_USE_COMPLEX) 2694 mumps->sym = 2; 2695 #else 2696 if (A->spd_set && A->spd) mumps->sym = 1; 2697 else mumps->sym = 2; 2698 #endif 2699 } 2700 2701 /* set solvertype */ 2702 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2703 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2704 2705 B->ops->destroy = MatDestroy_MUMPS; 2706 B->data = (void*)mumps; 2707 2708 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2709 2710 *F = B; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2715 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2716 { 2717 Mat B; 2718 PetscErrorCode ierr; 2719 Mat_MUMPS *mumps; 2720 PetscBool isSeqSBAIJ; 2721 2722 PetscFunctionBegin; 2723 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2724 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2725 /* Create the factorization matrix */ 2726 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2727 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2728 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2729 ierr = MatSetUp(B);CHKERRQ(ierr); 2730 2731 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2732 if (isSeqSBAIJ) { 2733 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2734 } else { 2735 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2736 } 2737 2738 B->ops->getinfo = MatGetInfo_External; 2739 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2740 B->ops->view = MatView_MUMPS; 2741 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2751 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2752 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2753 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2754 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2755 2756 B->factortype = MAT_FACTOR_CHOLESKY; 2757 #if defined(PETSC_USE_COMPLEX) 2758 mumps->sym = 2; 2759 #else 2760 if (A->spd_set && A->spd) mumps->sym = 1; 2761 else mumps->sym = 2; 2762 #endif 2763 2764 /* set solvertype */ 2765 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2766 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2767 2768 B->ops->destroy = MatDestroy_MUMPS; 2769 B->data = (void*)mumps; 2770 2771 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2772 2773 *F = B; 2774 PetscFunctionReturn(0); 2775 } 2776 2777 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2778 { 2779 Mat B; 2780 PetscErrorCode ierr; 2781 Mat_MUMPS *mumps; 2782 PetscBool isSeqBAIJ; 2783 2784 PetscFunctionBegin; 2785 /* Create the factorization matrix */ 2786 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2787 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2788 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2789 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2790 ierr = MatSetUp(B);CHKERRQ(ierr); 2791 2792 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2793 if (ftype == MAT_FACTOR_LU) { 2794 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2795 B->factortype = MAT_FACTOR_LU; 2796 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2797 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2798 mumps->sym = 0; 2799 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2800 2801 B->ops->getinfo = MatGetInfo_External; 2802 B->ops->view = MatView_MUMPS; 2803 2804 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2806 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2809 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2817 2818 /* set solvertype */ 2819 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2820 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2821 2822 B->ops->destroy = MatDestroy_MUMPS; 2823 B->data = (void*)mumps; 2824 2825 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2826 2827 *F = B; 2828 PetscFunctionReturn(0); 2829 } 2830 2831 /* MatGetFactor for Seq and MPI SELL matrices */ 2832 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2833 { 2834 Mat B; 2835 PetscErrorCode ierr; 2836 Mat_MUMPS *mumps; 2837 PetscBool isSeqSELL; 2838 2839 PetscFunctionBegin; 2840 /* Create the factorization matrix */ 2841 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2842 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2843 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2844 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2845 ierr = MatSetUp(B);CHKERRQ(ierr); 2846 2847 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2848 2849 B->ops->view = MatView_MUMPS; 2850 B->ops->getinfo = MatGetInfo_MUMPS; 2851 2852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2860 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2861 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2863 2864 if (ftype == MAT_FACTOR_LU) { 2865 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2866 B->factortype = MAT_FACTOR_LU; 2867 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2868 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2869 mumps->sym = 0; 2870 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2871 2872 /* set solvertype */ 2873 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2874 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2875 2876 B->ops->destroy = MatDestroy_MUMPS; 2877 B->data = (void*)mumps; 2878 2879 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2880 2881 *F = B; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2886 { 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2891 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2892 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2893 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2894 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2895 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2896 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2897 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2898 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2899 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2900 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2901 PetscFunctionReturn(0); 2902 } 2903 2904