1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 58 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 59 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 60 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 61 */ \ 62 ierr = MPI_Bcast(mumps->id.infog, 40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 63 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); \ 64 ierr = MPI_Bcast(mumps->id.info, 1, MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 65 } else { \ 66 MUMPS_c(&mumps->id); \ 67 } \ 68 } while(0) 69 #else 70 #define PetscMUMPS_c(mumps) \ 71 do { MUMPS_c(&mumps->id); } while (0) 72 #endif 73 74 /* declare MumpsScalar */ 75 #if defined(PETSC_USE_COMPLEX) 76 #if defined(PETSC_USE_REAL_SINGLE) 77 #define MumpsScalar mumps_complex 78 #else 79 #define MumpsScalar mumps_double_complex 80 #endif 81 #else 82 #define MumpsScalar PetscScalar 83 #endif 84 85 /* macros s.t. indices match MUMPS documentation */ 86 #define ICNTL(I) icntl[(I)-1] 87 #define CNTL(I) cntl[(I)-1] 88 #define INFOG(I) infog[(I)-1] 89 #define INFO(I) info[(I)-1] 90 #define RINFOG(I) rinfog[(I)-1] 91 #define RINFO(I) rinfo[(I)-1] 92 93 typedef struct { 94 #if defined(PETSC_USE_COMPLEX) 95 #if defined(PETSC_USE_REAL_SINGLE) 96 CMUMPS_STRUC_C id; 97 #else 98 ZMUMPS_STRUC_C id; 99 #endif 100 #else 101 #if defined(PETSC_USE_REAL_SINGLE) 102 SMUMPS_STRUC_C id; 103 #else 104 DMUMPS_STRUC_C id; 105 #endif 106 #endif 107 108 MatStructure matstruc; 109 PetscMPIInt myid,petsc_size; 110 PetscInt *irn,*jcn,nz,sym; 111 PetscScalar *val; 112 MPI_Comm mumps_comm; 113 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 114 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 115 Vec b_seq,x_seq; 116 PetscInt ninfo,*info; /* display INFO */ 117 PetscInt sizeredrhs; 118 PetscScalar *schur_sol; 119 PetscInt schur_sizesol; 120 121 PetscBool use_petsc_omp_support; 122 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 123 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 124 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 125 PetscMPIInt omp_comm_size; 126 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 127 PetscMPIInt *recvcount,*displs; 128 129 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 130 } Mat_MUMPS; 131 132 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 133 134 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 135 { 136 PetscErrorCode ierr; 137 138 PetscFunctionBegin; 139 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 140 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 141 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 142 mumps->id.size_schur = 0; 143 mumps->id.schur_lld = 0; 144 mumps->id.ICNTL(19) = 0; 145 PetscFunctionReturn(0); 146 } 147 148 /* solve with rhs in mumps->id.redrhs and return in the same location */ 149 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 150 { 151 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 152 Mat S,B,X; 153 MatFactorSchurStatus schurstatus; 154 PetscInt sizesol; 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 159 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 160 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 161 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 162 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 163 ierr = MatPinToCPU(B,S->pinnedtocpu);CHKERRQ(ierr); 164 #endif 165 switch (schurstatus) { 166 case MAT_FACTOR_SCHUR_FACTORED: 167 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 168 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 170 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 171 #endif 172 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 173 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 174 } else { 175 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 176 } 177 break; 178 case MAT_FACTOR_SCHUR_INVERTED: 179 sizesol = mumps->id.nrhs*mumps->id.size_schur; 180 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 181 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 182 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 183 mumps->schur_sizesol = sizesol; 184 } 185 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 186 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 187 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 188 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 189 #endif 190 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 191 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 192 } else { 193 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 194 } 195 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 196 break; 197 default: 198 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 199 break; 200 } 201 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 202 ierr = MatDestroy(&B);CHKERRQ(ierr); 203 ierr = MatDestroy(&X);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 208 { 209 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 214 PetscFunctionReturn(0); 215 } 216 if (!expansion) { /* prepare for the condensation step */ 217 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 218 /* allocate MUMPS internal array to store reduced right-hand sides */ 219 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 220 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 221 mumps->id.lredrhs = mumps->id.size_schur; 222 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 223 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 224 } 225 mumps->id.ICNTL(26) = 1; /* condensation phase */ 226 } else { /* prepare for the expansion step */ 227 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 228 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 229 mumps->id.ICNTL(26) = 2; /* expansion phase */ 230 PetscMUMPS_c(mumps); 231 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 232 /* restore defaults */ 233 mumps->id.ICNTL(26) = -1; 234 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 235 if (mumps->id.nrhs > 1) { 236 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 237 mumps->id.lredrhs = 0; 238 mumps->sizeredrhs = 0; 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 /* 245 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 246 247 input: 248 A - matrix in aij,baij or sbaij format 249 shift - 0: C style output triple; 1: Fortran style output triple. 250 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 251 MAT_REUSE_MATRIX: only the values in v array are updated 252 output: 253 nnz - dim of r, c, and v (number of local nonzero entries of A) 254 r, c, v - row and col index, matrix values (matrix triples) 255 256 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 257 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 258 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 259 260 */ 261 262 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 263 { 264 const PetscScalar *av; 265 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 266 PetscInt nz,rnz,i,j; 267 PetscErrorCode ierr; 268 PetscInt *row,*col; 269 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 270 271 PetscFunctionBegin; 272 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 273 *v = (PetscScalar*)av; 274 if (reuse == MAT_INITIAL_MATRIX) { 275 nz = aa->nz; 276 ai = aa->i; 277 aj = aa->j; 278 *nnz = nz; 279 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 280 col = row + nz; 281 282 nz = 0; 283 for (i=0; i<M; i++) { 284 rnz = ai[i+1] - ai[i]; 285 ajj = aj + ai[i]; 286 for (j=0; j<rnz; j++) { 287 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 288 } 289 } 290 *r = row; *c = col; 291 } 292 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 297 { 298 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 299 PetscInt *ptr; 300 301 PetscFunctionBegin; 302 *v = a->val; 303 if (reuse == MAT_INITIAL_MATRIX) { 304 PetscInt nz,i,j,row; 305 PetscErrorCode ierr; 306 307 nz = a->sliidx[a->totalslices]; 308 *nnz = nz; 309 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 310 *r = ptr; 311 *c = ptr + nz; 312 313 for (i=0; i<a->totalslices; i++) { 314 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 315 *ptr++ = 8*i + row + shift; 316 } 317 } 318 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 319 } 320 PetscFunctionReturn(0); 321 } 322 323 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 324 { 325 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 326 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 327 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 328 PetscErrorCode ierr; 329 PetscInt *row,*col; 330 331 PetscFunctionBegin; 332 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 333 M = A->rmap->N/bs; 334 *v = aa->a; 335 if (reuse == MAT_INITIAL_MATRIX) { 336 ai = aa->i; aj = aa->j; 337 nz = bs2*aa->nz; 338 *nnz = nz; 339 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 340 col = row + nz; 341 342 for (i=0; i<M; i++) { 343 ajj = aj + ai[i]; 344 rnz = ai[i+1] - ai[i]; 345 for (k=0; k<rnz; k++) { 346 for (j=0; j<bs; j++) { 347 for (m=0; m<bs; m++) { 348 row[idx] = i*bs + m + shift; 349 col[idx++] = bs*(ajj[k]) + j + shift; 350 } 351 } 352 } 353 } 354 *r = row; *c = col; 355 } 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 360 { 361 const PetscInt *ai, *aj,*ajj; 362 PetscInt nz,rnz,i,j,k,m,bs; 363 PetscErrorCode ierr; 364 PetscInt *row,*col; 365 PetscScalar *val; 366 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 367 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 368 #if defined(PETSC_USE_COMPLEX) 369 PetscBool hermitian; 370 #endif 371 372 PetscFunctionBegin; 373 #if defined(PETSC_USE_COMPLEX) 374 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 375 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 376 #endif 377 ai = aa->i; 378 aj = aa->j; 379 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 380 if (reuse == MAT_INITIAL_MATRIX) { 381 nz = aa->nz; 382 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 383 col = row + bs2*nz; 384 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 385 else val = aa->a; 386 387 *r = row; *c = col; *v = val; 388 } else { 389 if (bs == 1) *v = aa->a; 390 row = *r; col = *c; val = *v; 391 } 392 393 nz = 0; 394 if (bs>1) { 395 for (i=0; i<mbs; i++) { 396 rnz = ai[i+1] - ai[i]; 397 ajj = aj + ai[i]; 398 for (j=0; j<rnz; j++) { 399 for (k=0; k<bs; k++) { 400 for (m=0; m<bs; m++) { 401 if (ajj[j]>i || k>=m) { 402 if (reuse == MAT_INITIAL_MATRIX) { 403 row[nz] = i*bs + m + shift; 404 col[nz] = ajj[j]*bs + k + shift; 405 } 406 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 407 } 408 } 409 } 410 } 411 } 412 } else if (reuse == MAT_INITIAL_MATRIX) { 413 for (i=0; i<mbs; i++) { 414 rnz = ai[i+1] - ai[i]; 415 ajj = aj + ai[i]; 416 for (j=0; j<rnz; j++) { 417 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 418 } 419 } 420 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 421 } 422 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 423 PetscFunctionReturn(0); 424 } 425 426 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 427 { 428 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 429 PetscInt nz,rnz,i,j; 430 const PetscScalar *av,*v1; 431 PetscScalar *val; 432 PetscErrorCode ierr; 433 PetscInt *row,*col; 434 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 435 PetscBool missing; 436 #if defined(PETSC_USE_COMPLEX) 437 PetscBool hermitian; 438 #endif 439 440 PetscFunctionBegin; 441 #if defined(PETSC_USE_COMPLEX) 442 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 443 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 444 #endif 445 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 446 ai = aa->i; aj = aa->j; 447 adiag = aa->diag; 448 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 449 if (reuse == MAT_INITIAL_MATRIX) { 450 /* count nz in the upper triangular part of A */ 451 nz = 0; 452 if (missing) { 453 for (i=0; i<M; i++) { 454 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 455 for (j=ai[i];j<ai[i+1];j++) { 456 if (aj[j] < i) continue; 457 nz++; 458 } 459 } else { 460 nz += ai[i+1] - adiag[i]; 461 } 462 } 463 } else { 464 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 465 } 466 *nnz = nz; 467 468 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 469 col = row + nz; 470 val = (PetscScalar*)(col + nz); 471 472 nz = 0; 473 if (missing) { 474 for (i=0; i<M; i++) { 475 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 476 for (j=ai[i];j<ai[i+1];j++) { 477 if (aj[j] < i) continue; 478 row[nz] = i+shift; 479 col[nz] = aj[j]+shift; 480 val[nz] = av[j]; 481 nz++; 482 } 483 } else { 484 rnz = ai[i+1] - adiag[i]; 485 ajj = aj + adiag[i]; 486 v1 = av + adiag[i]; 487 for (j=0; j<rnz; j++) { 488 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 489 } 490 } 491 } 492 } else { 493 for (i=0; i<M; i++) { 494 rnz = ai[i+1] - adiag[i]; 495 ajj = aj + adiag[i]; 496 v1 = av + adiag[i]; 497 for (j=0; j<rnz; j++) { 498 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 499 } 500 } 501 } 502 *r = row; *c = col; *v = val; 503 } else { 504 nz = 0; val = *v; 505 if (missing) { 506 for (i=0; i <M; i++) { 507 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 508 for (j=ai[i];j<ai[i+1];j++) { 509 if (aj[j] < i) continue; 510 val[nz++] = av[j]; 511 } 512 } else { 513 rnz = ai[i+1] - adiag[i]; 514 v1 = av + adiag[i]; 515 for (j=0; j<rnz; j++) { 516 val[nz++] = v1[j]; 517 } 518 } 519 } 520 } else { 521 for (i=0; i <M; i++) { 522 rnz = ai[i+1] - adiag[i]; 523 v1 = av + adiag[i]; 524 for (j=0; j<rnz; j++) { 525 val[nz++] = v1[j]; 526 } 527 } 528 } 529 } 530 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 535 { 536 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 537 PetscErrorCode ierr; 538 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 539 PetscInt *row,*col; 540 const PetscScalar *av,*bv,*v1,*v2; 541 PetscScalar *val; 542 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 543 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 544 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 545 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 546 #if defined(PETSC_USE_COMPLEX) 547 PetscBool hermitian; 548 #endif 549 550 PetscFunctionBegin; 551 #if defined(PETSC_USE_COMPLEX) 552 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 553 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 554 #endif 555 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 556 rstart = A->rmap->rstart; 557 ai = aa->i; 558 aj = aa->j; 559 bi = bb->i; 560 bj = bb->j; 561 av = aa->a; 562 bv = bb->a; 563 564 garray = mat->garray; 565 566 if (reuse == MAT_INITIAL_MATRIX) { 567 nz = aa->nz + bb->nz; 568 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 569 col = row + bs2*nz; 570 val = (PetscScalar*)(col + bs2*nz); 571 572 *r = row; *c = col; *v = val; 573 } else { 574 row = *r; col = *c; val = *v; 575 } 576 577 jj = 0; irow = rstart; 578 for (i=0; i<mbs; i++) { 579 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 580 countA = ai[i+1] - ai[i]; 581 countB = bi[i+1] - bi[i]; 582 bjj = bj + bi[i]; 583 v1 = av + ai[i]*bs2; 584 v2 = bv + bi[i]*bs2; 585 586 if (bs>1) { 587 /* A-part */ 588 for (j=0; j<countA; j++) { 589 for (k=0; k<bs; k++) { 590 for (m=0; m<bs; m++) { 591 if (rstart + ajj[j]*bs>irow || k>=m) { 592 if (reuse == MAT_INITIAL_MATRIX) { 593 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 594 } 595 val[jj++] = v1[j*bs2 + m + k*bs]; 596 } 597 } 598 } 599 } 600 601 /* B-part */ 602 for (j=0; j < countB; j++) { 603 for (k=0; k<bs; k++) { 604 for (m=0; m<bs; m++) { 605 if (reuse == MAT_INITIAL_MATRIX) { 606 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 607 } 608 val[jj++] = v2[j*bs2 + m + k*bs]; 609 } 610 } 611 } 612 } else { 613 /* A-part */ 614 for (j=0; j<countA; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 617 } 618 val[jj++] = v1[j]; 619 } 620 621 /* B-part */ 622 for (j=0; j < countB; j++) { 623 if (reuse == MAT_INITIAL_MATRIX) { 624 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 625 } 626 val[jj++] = v2[j]; 627 } 628 } 629 irow+=bs; 630 } 631 *nnz = jj; 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 636 { 637 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 638 PetscErrorCode ierr; 639 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 640 PetscInt *row,*col; 641 const PetscScalar *av, *bv,*v1,*v2; 642 PetscScalar *val; 643 Mat Ad,Ao; 644 Mat_SeqAIJ *aa; 645 Mat_SeqAIJ *bb; 646 647 PetscFunctionBegin; 648 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 649 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 650 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 651 652 aa = (Mat_SeqAIJ*)(Ad)->data; 653 bb = (Mat_SeqAIJ*)(Ao)->data; 654 ai = aa->i; 655 aj = aa->j; 656 bi = bb->i; 657 bj = bb->j; 658 659 rstart = A->rmap->rstart; 660 661 if (reuse == MAT_INITIAL_MATRIX) { 662 nz = aa->nz + bb->nz; 663 *nnz = nz; 664 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 665 col = row + nz; 666 val = (PetscScalar*)(col + nz); 667 668 *r = row; *c = col; *v = val; 669 } else { 670 row = *r; col = *c; val = *v; 671 } 672 673 jj = 0; irow = rstart; 674 for (i=0; i<m; i++) { 675 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 676 countA = ai[i+1] - ai[i]; 677 countB = bi[i+1] - bi[i]; 678 bjj = bj + bi[i]; 679 v1 = av + ai[i]; 680 v2 = bv + bi[i]; 681 682 /* A-part */ 683 for (j=0; j<countA; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 686 } 687 val[jj++] = v1[j]; 688 } 689 690 /* B-part */ 691 for (j=0; j < countB; j++) { 692 if (reuse == MAT_INITIAL_MATRIX) { 693 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 694 } 695 val[jj++] = v2[j]; 696 } 697 irow++; 698 } 699 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 700 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 705 { 706 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 707 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 708 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 709 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 710 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 711 const PetscInt bs2=mat->bs2; 712 PetscErrorCode ierr; 713 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 714 PetscInt *row,*col; 715 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 716 PetscScalar *val; 717 718 PetscFunctionBegin; 719 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 720 if (reuse == MAT_INITIAL_MATRIX) { 721 nz = bs2*(aa->nz + bb->nz); 722 *nnz = nz; 723 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 724 col = row + nz; 725 val = (PetscScalar*)(col + nz); 726 727 *r = row; *c = col; *v = val; 728 } else { 729 row = *r; col = *c; val = *v; 730 } 731 732 jj = 0; irow = rstart; 733 for (i=0; i<mbs; i++) { 734 countA = ai[i+1] - ai[i]; 735 countB = bi[i+1] - bi[i]; 736 ajj = aj + ai[i]; 737 bjj = bj + bi[i]; 738 v1 = av + bs2*ai[i]; 739 v2 = bv + bs2*bi[i]; 740 741 idx = 0; 742 /* A-part */ 743 for (k=0; k<countA; k++) { 744 for (j=0; j<bs; j++) { 745 for (n=0; n<bs; n++) { 746 if (reuse == MAT_INITIAL_MATRIX) { 747 row[jj] = irow + n + shift; 748 col[jj] = rstart + bs*ajj[k] + j + shift; 749 } 750 val[jj++] = v1[idx++]; 751 } 752 } 753 } 754 755 idx = 0; 756 /* B-part */ 757 for (k=0; k<countB; k++) { 758 for (j=0; j<bs; j++) { 759 for (n=0; n<bs; n++) { 760 if (reuse == MAT_INITIAL_MATRIX) { 761 row[jj] = irow + n + shift; 762 col[jj] = bs*garray[bjj[k]] + j + shift; 763 } 764 val[jj++] = v2[idx++]; 765 } 766 } 767 } 768 irow += bs; 769 } 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 774 { 775 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 776 PetscErrorCode ierr; 777 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 778 PetscInt *row,*col; 779 const PetscScalar *av, *bv,*v1,*v2; 780 PetscScalar *val; 781 Mat Ad,Ao; 782 Mat_SeqAIJ *aa; 783 Mat_SeqAIJ *bb; 784 #if defined(PETSC_USE_COMPLEX) 785 PetscBool hermitian; 786 #endif 787 788 PetscFunctionBegin; 789 #if defined(PETSC_USE_COMPLEX) 790 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 791 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 792 #endif 793 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 794 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 795 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 796 797 aa = (Mat_SeqAIJ*)(Ad)->data; 798 bb = (Mat_SeqAIJ*)(Ao)->data; 799 ai = aa->i; 800 aj = aa->j; 801 adiag = aa->diag; 802 bi = bb->i; 803 bj = bb->j; 804 805 rstart = A->rmap->rstart; 806 807 if (reuse == MAT_INITIAL_MATRIX) { 808 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 809 nzb = 0; /* num of upper triangular entries in mat->B */ 810 for (i=0; i<m; i++) { 811 nza += (ai[i+1] - adiag[i]); 812 countB = bi[i+1] - bi[i]; 813 bjj = bj + bi[i]; 814 for (j=0; j<countB; j++) { 815 if (garray[bjj[j]] > rstart) nzb++; 816 } 817 } 818 819 nz = nza + nzb; /* total nz of upper triangular part of mat */ 820 *nnz = nz; 821 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 822 col = row + nz; 823 val = (PetscScalar*)(col + nz); 824 825 *r = row; *c = col; *v = val; 826 } else { 827 row = *r; col = *c; val = *v; 828 } 829 830 jj = 0; irow = rstart; 831 for (i=0; i<m; i++) { 832 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 833 v1 = av + adiag[i]; 834 countA = ai[i+1] - adiag[i]; 835 countB = bi[i+1] - bi[i]; 836 bjj = bj + bi[i]; 837 v2 = bv + bi[i]; 838 839 /* A-part */ 840 for (j=0; j<countA; j++) { 841 if (reuse == MAT_INITIAL_MATRIX) { 842 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 843 } 844 val[jj++] = v1[j]; 845 } 846 847 /* B-part */ 848 for (j=0; j < countB; j++) { 849 if (garray[bjj[j]] > rstart) { 850 if (reuse == MAT_INITIAL_MATRIX) { 851 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 852 } 853 val[jj++] = v2[j]; 854 } 855 } 856 irow++; 857 } 858 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 859 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatDestroy_MUMPS(Mat A) 864 { 865 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 870 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 871 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 872 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 873 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 874 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 875 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 876 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 877 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 878 mumps->id.job = JOB_END; 879 PetscMUMPS_c(mumps); 880 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 881 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 882 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 883 #endif 884 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 885 ierr = PetscFree(A->data);CHKERRQ(ierr); 886 887 /* clear composed functions */ 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 892 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 893 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 894 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 895 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 896 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 897 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 898 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 899 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 900 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 905 { 906 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 907 PetscScalar *array; 908 Vec b_seq; 909 IS is_iden,is_petsc; 910 PetscErrorCode ierr; 911 PetscInt i; 912 PetscBool second_solve = PETSC_FALSE; 913 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 914 915 PetscFunctionBegin; 916 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 917 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 918 919 if (A->factorerrortype) { 920 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 921 ierr = VecSetInf(x);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 925 mumps->id.ICNTL(20) = 0; /* dense RHS */ 926 mumps->id.nrhs = 1; 927 b_seq = mumps->b_seq; 928 if (mumps->petsc_size > 1) { 929 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 930 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 933 } else { /* petsc_size == 1 */ 934 ierr = VecCopy(b,x);CHKERRQ(ierr); 935 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 936 } 937 if (!mumps->myid) { /* define rhs on the host */ 938 mumps->id.nrhs = 1; 939 mumps->id.rhs = (MumpsScalar*)array; 940 } 941 942 /* 943 handle condensation step of Schur complement (if any) 944 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 945 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 946 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 947 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 948 */ 949 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 950 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 951 second_solve = PETSC_TRUE; 952 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 953 } 954 /* solve phase */ 955 /*-------------*/ 956 mumps->id.job = JOB_SOLVE; 957 PetscMUMPS_c(mumps); 958 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 959 960 /* handle expansion step of Schur complement (if any) */ 961 if (second_solve) { 962 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 963 } 964 965 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 966 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 967 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 968 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 969 } 970 if (!mumps->scat_sol) { /* create scatter scat_sol */ 971 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 972 for (i=0; i<mumps->id.lsol_loc; i++) { 973 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 974 } 975 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 976 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 977 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 978 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 979 980 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 981 } 982 983 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 } 986 987 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 988 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 989 990 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 995 { 996 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 mumps->id.ICNTL(9) = 0; 1001 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1002 mumps->id.ICNTL(9) = 1; 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1007 { 1008 PetscErrorCode ierr; 1009 Mat Bt = NULL; 1010 PetscBool flg, flgT; 1011 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1012 PetscInt i,nrhs,M; 1013 PetscScalar *array; 1014 const PetscScalar *rbray; 1015 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1016 PetscScalar *bray,*sol_loc,*sol_loc_save; 1017 IS is_to,is_from; 1018 PetscInt k,proc,j,m,myrstart; 1019 const PetscInt *rstart; 1020 Vec v_mpi,b_seq,msol_loc; 1021 VecScatter scat_rhs,scat_sol; 1022 PetscScalar *aa; 1023 PetscInt spnr,*ia,*ja; 1024 Mat_MPIAIJ *b = NULL; 1025 1026 PetscFunctionBegin; 1027 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1028 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1029 1030 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1031 if (flg) { /* dense B */ 1032 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1033 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1034 } else { /* sparse B */ 1035 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1036 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1037 if (flgT) { /* input B is transpose of actural RHS matrix, 1038 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1039 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1040 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1041 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1042 } 1043 1044 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1045 mumps->id.nrhs = nrhs; 1046 mumps->id.lrhs = M; 1047 mumps->id.rhs = NULL; 1048 1049 if (mumps->petsc_size == 1) { 1050 PetscScalar *aa; 1051 PetscInt spnr,*ia,*ja; 1052 PetscBool second_solve = PETSC_FALSE; 1053 1054 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1055 mumps->id.rhs = (MumpsScalar*)array; 1056 1057 if (!Bt) { /* dense B */ 1058 /* copy B to X */ 1059 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1060 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1061 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1062 } else { /* sparse B */ 1063 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1064 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1065 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1066 /* mumps requires ia and ja start at 1! */ 1067 mumps->id.irhs_ptr = ia; 1068 mumps->id.irhs_sparse = ja; 1069 mumps->id.nz_rhs = ia[spnr] - 1; 1070 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1071 } 1072 /* handle condensation step of Schur complement (if any) */ 1073 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1074 second_solve = PETSC_TRUE; 1075 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1076 } 1077 /* solve phase */ 1078 /*-------------*/ 1079 mumps->id.job = JOB_SOLVE; 1080 PetscMUMPS_c(mumps); 1081 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1082 1083 /* handle expansion step of Schur complement (if any) */ 1084 if (second_solve) { 1085 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1086 } 1087 if (Bt) { /* sparse B */ 1088 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1089 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1090 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1091 } 1092 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1097 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1098 1099 /* create msol_loc to hold mumps local solution */ 1100 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1101 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1102 1103 lsol_loc = mumps->id.lsol_loc; 1104 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1105 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1106 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1107 mumps->id.isol_loc = isol_loc; 1108 1109 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1110 1111 if (!Bt) { /* dense B */ 1112 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1113 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1114 0, re-arrange B into desired order, which is a local operation. 1115 */ 1116 1117 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1118 /* wrap dense rhs matrix B into a vector v_mpi */ 1119 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1120 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1121 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1122 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1123 1124 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1125 if (!mumps->myid) { 1126 PetscInt *idx; 1127 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1128 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1129 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1130 k = 0; 1131 for (proc=0; proc<mumps->petsc_size; proc++){ 1132 for (j=0; j<nrhs; j++){ 1133 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1134 } 1135 } 1136 1137 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1138 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1139 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1140 } else { 1141 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1142 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1143 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1144 } 1145 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1146 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1148 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 1151 if (!mumps->myid) { /* define rhs on the host */ 1152 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1153 mumps->id.rhs = (MumpsScalar*)bray; 1154 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1155 } 1156 1157 } else { /* sparse B */ 1158 b = (Mat_MPIAIJ*)Bt->data; 1159 1160 /* wrap dense X into a vector v_mpi */ 1161 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1162 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1163 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1164 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1165 1166 if (!mumps->myid) { 1167 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1168 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1169 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1170 /* mumps requires ia and ja start at 1! */ 1171 mumps->id.irhs_ptr = ia; 1172 mumps->id.irhs_sparse = ja; 1173 mumps->id.nz_rhs = ia[spnr] - 1; 1174 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1175 } else { 1176 mumps->id.irhs_ptr = NULL; 1177 mumps->id.irhs_sparse = NULL; 1178 mumps->id.nz_rhs = 0; 1179 mumps->id.rhs_sparse = NULL; 1180 } 1181 } 1182 1183 /* solve phase */ 1184 /*-------------*/ 1185 mumps->id.job = JOB_SOLVE; 1186 PetscMUMPS_c(mumps); 1187 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1188 1189 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1190 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1191 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1192 1193 /* create scatter scat_sol */ 1194 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1195 /* iidx: index for scatter mumps solution to petsc X */ 1196 1197 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1198 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1199 for (i=0; i<lsol_loc; i++) { 1200 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1201 1202 for (proc=0; proc<mumps->petsc_size; proc++){ 1203 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1204 myrstart = rstart[proc]; 1205 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1206 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1207 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1208 break; 1209 } 1210 } 1211 1212 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1213 } 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1215 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1216 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1218 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1221 1222 /* free spaces */ 1223 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1224 mumps->id.isol_loc = isol_loc_save; 1225 1226 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1227 ierr = PetscFree(idxx);CHKERRQ(ierr); 1228 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1229 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1230 if (Bt) { 1231 if (!mumps->myid) { 1232 b = (Mat_MPIAIJ*)Bt->data; 1233 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1234 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1235 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1236 } 1237 } else { 1238 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1239 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1240 } 1241 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1242 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1247 { 1248 PetscErrorCode ierr; 1249 PetscBool flg; 1250 Mat B; 1251 1252 PetscFunctionBegin; 1253 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1254 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1255 1256 /* Create B=Bt^T that uses Bt's data structure */ 1257 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1258 1259 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1260 ierr = MatDestroy(&B);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #if !defined(PETSC_USE_COMPLEX) 1265 /* 1266 input: 1267 F: numeric factor 1268 output: 1269 nneg: total number of negative pivots 1270 nzero: total number of zero pivots 1271 npos: (global dimension of F) - nneg - nzero 1272 */ 1273 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1274 { 1275 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1276 PetscErrorCode ierr; 1277 PetscMPIInt size; 1278 1279 PetscFunctionBegin; 1280 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1281 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1282 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1283 1284 if (nneg) *nneg = mumps->id.INFOG(12); 1285 if (nzero || npos) { 1286 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1287 if (nzero) *nzero = mumps->id.INFOG(28); 1288 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1289 } 1290 PetscFunctionReturn(0); 1291 } 1292 #endif 1293 1294 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1295 { 1296 PetscErrorCode ierr; 1297 PetscInt i,nz=0,*irn,*jcn=0; 1298 PetscScalar *val=0; 1299 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1300 1301 PetscFunctionBegin; 1302 if (mumps->omp_comm_size > 1) { 1303 if (reuse == MAT_INITIAL_MATRIX) { 1304 /* master first gathers counts of nonzeros to receive */ 1305 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1306 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1307 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1308 1309 /* master allocates memory to receive nonzeros */ 1310 if (mumps->is_omp_master) { 1311 displs[0] = 0; 1312 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1313 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1314 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1315 jcn = irn + nz; 1316 val = (PetscScalar*)(jcn + nz); 1317 } 1318 1319 /* save the gatherv plan */ 1320 mumps->mpinz = mpinz; /* used as send count */ 1321 mumps->recvcount = recvcount; 1322 mumps->displs = displs; 1323 1324 /* master gathers nonzeros */ 1325 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1326 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1327 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1328 1329 /* master frees its row/col/val and replaces them with bigger arrays */ 1330 if (mumps->is_omp_master) { 1331 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1332 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1333 mumps->irn = irn; 1334 mumps->jcn = jcn; 1335 mumps->val = val; 1336 } 1337 } else { 1338 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1339 } 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1345 { 1346 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1347 PetscErrorCode ierr; 1348 PetscBool isMPIAIJ; 1349 1350 PetscFunctionBegin; 1351 if (mumps->id.INFOG(1) < 0) { 1352 if (mumps->id.INFOG(1) == -6) { 1353 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1354 } 1355 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1360 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1361 1362 /* numerical factorization phase */ 1363 /*-------------------------------*/ 1364 mumps->id.job = JOB_FACTNUMERIC; 1365 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1366 if (!mumps->myid) { 1367 mumps->id.a = (MumpsScalar*)mumps->val; 1368 } 1369 } else { 1370 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1371 } 1372 PetscMUMPS_c(mumps); 1373 if (mumps->id.INFOG(1) < 0) { 1374 if (A->erroriffailure) { 1375 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1376 } else { 1377 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1378 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1379 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1380 } else if (mumps->id.INFOG(1) == -13) { 1381 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1382 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1383 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1384 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1385 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1386 } else { 1387 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1388 F->factorerrortype = MAT_FACTOR_OTHER; 1389 } 1390 } 1391 } 1392 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1393 1394 F->assembled = PETSC_TRUE; 1395 mumps->matstruc = SAME_NONZERO_PATTERN; 1396 if (F->schur) { /* reset Schur status to unfactored */ 1397 #if defined(PETSC_HAVE_CUDA) 1398 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1399 #endif 1400 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1401 mumps->id.ICNTL(19) = 2; 1402 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1403 } 1404 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1405 } 1406 1407 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1408 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1409 1410 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1411 if (mumps->petsc_size > 1) { 1412 PetscInt lsol_loc; 1413 PetscScalar *sol_loc; 1414 1415 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1416 1417 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1418 if (mumps->x_seq) { 1419 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1420 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1421 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1422 } 1423 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1424 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1425 mumps->id.lsol_loc = lsol_loc; 1426 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1427 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1428 } 1429 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 /* Sets MUMPS options from the options database */ 1434 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1435 { 1436 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1437 PetscErrorCode ierr; 1438 PetscInt icntl,info[80],i,ninfo=80; 1439 PetscBool flg; 1440 1441 PetscFunctionBegin; 1442 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1443 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1444 if (flg) mumps->id.ICNTL(1) = icntl; 1445 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1446 if (flg) mumps->id.ICNTL(2) = icntl; 1447 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1448 if (flg) mumps->id.ICNTL(3) = icntl; 1449 1450 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1451 if (flg) mumps->id.ICNTL(4) = icntl; 1452 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1453 1454 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1455 if (flg) mumps->id.ICNTL(6) = icntl; 1456 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1458 if (flg) { 1459 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1460 else mumps->id.ICNTL(7) = icntl; 1461 } 1462 1463 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1464 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1465 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1466 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1467 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1468 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1470 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1471 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1472 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1473 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1474 } 1475 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1476 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1477 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1480 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1481 if (mumps->id.ICNTL(24)) { 1482 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1483 } 1484 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1487 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1488 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1489 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1490 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1491 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1492 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1493 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1494 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1495 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1496 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1497 1498 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1499 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1500 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1501 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1502 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1503 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1504 1505 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1506 1507 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1508 if (ninfo) { 1509 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1510 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1511 mumps->ninfo = ninfo; 1512 for (i=0; i<ninfo; i++) { 1513 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1514 else mumps->info[i] = info[i]; 1515 } 1516 } 1517 1518 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1523 { 1524 PetscErrorCode ierr; 1525 PetscInt nthreads=0; 1526 1527 PetscFunctionBegin; 1528 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1529 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1530 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1531 1532 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1533 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1534 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1535 if (mumps->use_petsc_omp_support) { 1536 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1537 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1538 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1539 #else 1540 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1541 #endif 1542 } else { 1543 mumps->omp_comm = PETSC_COMM_SELF; 1544 mumps->mumps_comm = mumps->petsc_comm; 1545 mumps->is_omp_master = PETSC_TRUE; 1546 } 1547 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1548 1549 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1550 mumps->id.job = JOB_INIT; 1551 mumps->id.par = 1; /* host participates factorizaton and solve */ 1552 mumps->id.sym = mumps->sym; 1553 1554 PetscMUMPS_c(mumps); 1555 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1556 1557 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1558 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1559 */ 1560 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1561 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1562 1563 mumps->scat_rhs = NULL; 1564 mumps->scat_sol = NULL; 1565 1566 /* set PETSc-MUMPS default options - override MUMPS default */ 1567 mumps->id.ICNTL(3) = 0; 1568 mumps->id.ICNTL(4) = 0; 1569 if (mumps->petsc_size == 1) { 1570 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1571 } else { 1572 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1573 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1574 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1575 } 1576 1577 /* schur */ 1578 mumps->id.size_schur = 0; 1579 mumps->id.listvar_schur = NULL; 1580 mumps->id.schur = NULL; 1581 mumps->sizeredrhs = 0; 1582 mumps->schur_sol = NULL; 1583 mumps->schur_sizesol = 0; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 if (mumps->id.INFOG(1) < 0) { 1593 if (A->erroriffailure) { 1594 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1595 } else { 1596 if (mumps->id.INFOG(1) == -6) { 1597 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1598 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1599 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1600 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1601 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1602 } else { 1603 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1604 F->factorerrortype = MAT_FACTOR_OTHER; 1605 } 1606 } 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1612 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1613 { 1614 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1615 PetscErrorCode ierr; 1616 Vec b; 1617 const PetscInt M = A->rmap->N; 1618 1619 PetscFunctionBegin; 1620 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1621 1622 /* Set MUMPS options from the options database */ 1623 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1624 1625 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1626 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1627 1628 /* analysis phase */ 1629 /*----------------*/ 1630 mumps->id.job = JOB_FACTSYMBOLIC; 1631 mumps->id.n = M; 1632 switch (mumps->id.ICNTL(18)) { 1633 case 0: /* centralized assembled matrix input */ 1634 if (!mumps->myid) { 1635 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1636 if (mumps->id.ICNTL(6)>1) { 1637 mumps->id.a = (MumpsScalar*)mumps->val; 1638 } 1639 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1640 /* 1641 PetscBool flag; 1642 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1643 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1644 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1645 */ 1646 if (!mumps->myid) { 1647 const PetscInt *idx; 1648 PetscInt i,*perm_in; 1649 1650 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1651 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1652 1653 mumps->id.perm_in = perm_in; 1654 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1655 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1656 } 1657 } 1658 } 1659 break; 1660 case 3: /* distributed assembled matrix input (size>1) */ 1661 mumps->id.nz_loc = mumps->nz; 1662 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1663 if (mumps->id.ICNTL(6)>1) { 1664 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1665 } 1666 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1667 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1668 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1669 ierr = VecDestroy(&b);CHKERRQ(ierr); 1670 break; 1671 } 1672 PetscMUMPS_c(mumps); 1673 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1674 1675 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1676 F->ops->solve = MatSolve_MUMPS; 1677 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1678 F->ops->matsolve = MatMatSolve_MUMPS; 1679 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1680 PetscFunctionReturn(0); 1681 } 1682 1683 /* Note the Petsc r and c permutations are ignored */ 1684 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1685 { 1686 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1687 PetscErrorCode ierr; 1688 Vec b; 1689 const PetscInt M = A->rmap->N; 1690 1691 PetscFunctionBegin; 1692 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1693 1694 /* Set MUMPS options from the options database */ 1695 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1696 1697 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1698 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1699 1700 /* analysis phase */ 1701 /*----------------*/ 1702 mumps->id.job = JOB_FACTSYMBOLIC; 1703 mumps->id.n = M; 1704 switch (mumps->id.ICNTL(18)) { 1705 case 0: /* centralized assembled matrix input */ 1706 if (!mumps->myid) { 1707 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1708 if (mumps->id.ICNTL(6)>1) { 1709 mumps->id.a = (MumpsScalar*)mumps->val; 1710 } 1711 } 1712 break; 1713 case 3: /* distributed assembled matrix input (size>1) */ 1714 mumps->id.nz_loc = mumps->nz; 1715 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1716 if (mumps->id.ICNTL(6)>1) { 1717 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1718 } 1719 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1720 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1721 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1722 ierr = VecDestroy(&b);CHKERRQ(ierr); 1723 break; 1724 } 1725 PetscMUMPS_c(mumps); 1726 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1727 1728 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1729 F->ops->solve = MatSolve_MUMPS; 1730 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 /* Note the Petsc r permutation and factor info are ignored */ 1735 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1736 { 1737 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1738 PetscErrorCode ierr; 1739 Vec b; 1740 const PetscInt M = A->rmap->N; 1741 1742 PetscFunctionBegin; 1743 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1744 1745 /* Set MUMPS options from the options database */ 1746 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1747 1748 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1749 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1750 1751 /* analysis phase */ 1752 /*----------------*/ 1753 mumps->id.job = JOB_FACTSYMBOLIC; 1754 mumps->id.n = M; 1755 switch (mumps->id.ICNTL(18)) { 1756 case 0: /* centralized assembled matrix input */ 1757 if (!mumps->myid) { 1758 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1759 if (mumps->id.ICNTL(6)>1) { 1760 mumps->id.a = (MumpsScalar*)mumps->val; 1761 } 1762 } 1763 break; 1764 case 3: /* distributed assembled matrix input (size>1) */ 1765 mumps->id.nz_loc = mumps->nz; 1766 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1767 if (mumps->id.ICNTL(6)>1) { 1768 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1769 } 1770 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1771 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1772 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1773 ierr = VecDestroy(&b);CHKERRQ(ierr); 1774 break; 1775 } 1776 PetscMUMPS_c(mumps); 1777 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1778 1779 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1780 F->ops->solve = MatSolve_MUMPS; 1781 F->ops->solvetranspose = MatSolve_MUMPS; 1782 F->ops->matsolve = MatMatSolve_MUMPS; 1783 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1784 #if defined(PETSC_USE_COMPLEX) 1785 F->ops->getinertia = NULL; 1786 #else 1787 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1788 #endif 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1793 { 1794 PetscErrorCode ierr; 1795 PetscBool iascii; 1796 PetscViewerFormat format; 1797 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1798 1799 PetscFunctionBegin; 1800 /* check if matrix is mumps type */ 1801 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1802 1803 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1804 if (iascii) { 1805 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1806 if (format == PETSC_VIEWER_ASCII_INFO) { 1807 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1820 if (mumps->id.ICNTL(11)>0) { 1821 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1827 } 1828 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1831 /* ICNTL(15-17) not used */ 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1838 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1845 1846 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1852 1853 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1859 1860 /* infomation local to each processor */ 1861 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1862 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1863 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1864 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1865 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1866 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1867 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1868 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1869 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1870 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1871 1872 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1873 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1874 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1875 1876 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1877 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1878 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1879 1880 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1881 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1882 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1883 1884 if (mumps->ninfo && mumps->ninfo <= 80){ 1885 PetscInt i; 1886 for (i=0; i<mumps->ninfo; i++){ 1887 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1888 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1890 } 1891 } 1892 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1893 1894 if (!mumps->myid) { /* information from the host */ 1895 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1899 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1925 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1926 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1927 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1928 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1929 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1930 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1931 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1932 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1933 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1934 } 1935 } 1936 } 1937 PetscFunctionReturn(0); 1938 } 1939 1940 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1941 { 1942 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1943 1944 PetscFunctionBegin; 1945 info->block_size = 1.0; 1946 info->nz_allocated = mumps->id.INFOG(20); 1947 info->nz_used = mumps->id.INFOG(20); 1948 info->nz_unneeded = 0.0; 1949 info->assemblies = 0.0; 1950 info->mallocs = 0.0; 1951 info->memory = 0.0; 1952 info->fill_ratio_given = 0; 1953 info->fill_ratio_needed = 0; 1954 info->factor_mallocs = 0; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 /* -------------------------------------------------------------------------------------------*/ 1959 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1960 { 1961 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1962 const PetscScalar *arr; 1963 const PetscInt *idxs; 1964 PetscInt size,i; 1965 PetscErrorCode ierr; 1966 1967 PetscFunctionBegin; 1968 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1969 if (mumps->petsc_size > 1) { 1970 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1971 1972 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1973 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1974 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1975 } 1976 1977 /* Schur complement matrix */ 1978 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1979 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1980 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1981 mumps->id.schur = (MumpsScalar*)arr; 1982 mumps->id.size_schur = size; 1983 mumps->id.schur_lld = size; 1984 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1985 if (mumps->sym == 1) { 1986 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1987 } 1988 1989 /* MUMPS expects Fortran style indices */ 1990 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1991 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1992 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1993 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1994 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1995 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1996 if (mumps->petsc_size > 1) { 1997 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1998 } else { 1999 if (F->factortype == MAT_FACTOR_LU) { 2000 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2001 } else { 2002 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2003 } 2004 } 2005 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2006 mumps->id.ICNTL(26) = -1; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 /* -------------------------------------------------------------------------------------------*/ 2011 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2012 { 2013 Mat St; 2014 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2015 PetscScalar *array; 2016 #if defined(PETSC_USE_COMPLEX) 2017 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2018 #endif 2019 PetscErrorCode ierr; 2020 2021 PetscFunctionBegin; 2022 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2023 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2024 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2025 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2026 ierr = MatSetUp(St);CHKERRQ(ierr); 2027 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2028 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2029 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2030 PetscInt i,j,N=mumps->id.size_schur; 2031 for (i=0;i<N;i++) { 2032 for (j=0;j<N;j++) { 2033 #if !defined(PETSC_USE_COMPLEX) 2034 PetscScalar val = mumps->id.schur[i*N+j]; 2035 #else 2036 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2037 #endif 2038 array[j*N+i] = val; 2039 } 2040 } 2041 } else { /* stored by columns */ 2042 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2043 } 2044 } else { /* either full or lower-triangular (not packed) */ 2045 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2046 PetscInt i,j,N=mumps->id.size_schur; 2047 for (i=0;i<N;i++) { 2048 for (j=i;j<N;j++) { 2049 #if !defined(PETSC_USE_COMPLEX) 2050 PetscScalar val = mumps->id.schur[i*N+j]; 2051 #else 2052 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2053 #endif 2054 array[i*N+j] = val; 2055 array[j*N+i] = val; 2056 } 2057 } 2058 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2059 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2060 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2061 PetscInt i,j,N=mumps->id.size_schur; 2062 for (i=0;i<N;i++) { 2063 for (j=0;j<i+1;j++) { 2064 #if !defined(PETSC_USE_COMPLEX) 2065 PetscScalar val = mumps->id.schur[i*N+j]; 2066 #else 2067 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2068 #endif 2069 array[i*N+j] = val; 2070 array[j*N+i] = val; 2071 } 2072 } 2073 } 2074 } 2075 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2076 *S = St; 2077 PetscFunctionReturn(0); 2078 } 2079 2080 /* -------------------------------------------------------------------------------------------*/ 2081 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2082 { 2083 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2084 2085 PetscFunctionBegin; 2086 mumps->id.ICNTL(icntl) = ival; 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2091 { 2092 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2093 2094 PetscFunctionBegin; 2095 *ival = mumps->id.ICNTL(icntl); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 /*@ 2100 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2101 2102 Logically Collective on Mat 2103 2104 Input Parameters: 2105 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2106 . icntl - index of MUMPS parameter array ICNTL() 2107 - ival - value of MUMPS ICNTL(icntl) 2108 2109 Options Database: 2110 . -mat_mumps_icntl_<icntl> <ival> 2111 2112 Level: beginner 2113 2114 References: 2115 . MUMPS Users' Guide 2116 2117 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2118 @*/ 2119 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2120 { 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 PetscValidType(F,1); 2125 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2126 PetscValidLogicalCollectiveInt(F,icntl,2); 2127 PetscValidLogicalCollectiveInt(F,ival,3); 2128 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 /*@ 2133 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2134 2135 Logically Collective on Mat 2136 2137 Input Parameters: 2138 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2139 - icntl - index of MUMPS parameter array ICNTL() 2140 2141 Output Parameter: 2142 . ival - value of MUMPS ICNTL(icntl) 2143 2144 Level: beginner 2145 2146 References: 2147 . MUMPS Users' Guide 2148 2149 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2150 @*/ 2151 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2152 { 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 PetscValidType(F,1); 2157 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2158 PetscValidLogicalCollectiveInt(F,icntl,2); 2159 PetscValidIntPointer(ival,3); 2160 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 /* -------------------------------------------------------------------------------------------*/ 2165 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2166 { 2167 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2168 2169 PetscFunctionBegin; 2170 mumps->id.CNTL(icntl) = val; 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2175 { 2176 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2177 2178 PetscFunctionBegin; 2179 *val = mumps->id.CNTL(icntl); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 /*@ 2184 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2185 2186 Logically Collective on Mat 2187 2188 Input Parameters: 2189 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2190 . icntl - index of MUMPS parameter array CNTL() 2191 - val - value of MUMPS CNTL(icntl) 2192 2193 Options Database: 2194 . -mat_mumps_cntl_<icntl> <val> 2195 2196 Level: beginner 2197 2198 References: 2199 . MUMPS Users' Guide 2200 2201 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2202 @*/ 2203 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2204 { 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 PetscValidType(F,1); 2209 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2210 PetscValidLogicalCollectiveInt(F,icntl,2); 2211 PetscValidLogicalCollectiveReal(F,val,3); 2212 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 /*@ 2217 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2218 2219 Logically Collective on Mat 2220 2221 Input Parameters: 2222 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2223 - icntl - index of MUMPS parameter array CNTL() 2224 2225 Output Parameter: 2226 . val - value of MUMPS CNTL(icntl) 2227 2228 Level: beginner 2229 2230 References: 2231 . MUMPS Users' Guide 2232 2233 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2234 @*/ 2235 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2236 { 2237 PetscErrorCode ierr; 2238 2239 PetscFunctionBegin; 2240 PetscValidType(F,1); 2241 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2242 PetscValidLogicalCollectiveInt(F,icntl,2); 2243 PetscValidRealPointer(val,3); 2244 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2249 { 2250 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2251 2252 PetscFunctionBegin; 2253 *info = mumps->id.INFO(icntl); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2258 { 2259 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2260 2261 PetscFunctionBegin; 2262 *infog = mumps->id.INFOG(icntl); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2267 { 2268 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2269 2270 PetscFunctionBegin; 2271 *rinfo = mumps->id.RINFO(icntl); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2276 { 2277 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2278 2279 PetscFunctionBegin; 2280 *rinfog = mumps->id.RINFOG(icntl); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2285 { 2286 PetscErrorCode ierr; 2287 Mat Bt = NULL,Btseq = NULL; 2288 PetscBool flg; 2289 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2290 PetscScalar *aa; 2291 PetscInt spnr,*ia,*ja,M,nrhs; 2292 2293 PetscFunctionBegin; 2294 PetscValidIntPointer(spRHS,2); 2295 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2296 if (flg) { 2297 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2298 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2299 2300 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2301 2302 if (mumps->petsc_size > 1) { 2303 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2304 Btseq = b->A; 2305 } else { 2306 Btseq = Bt; 2307 } 2308 2309 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2310 mumps->id.nrhs = nrhs; 2311 mumps->id.lrhs = M; 2312 mumps->id.rhs = NULL; 2313 2314 if (!mumps->myid) { 2315 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2316 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2317 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2318 2319 mumps->id.irhs_ptr = ia; 2320 mumps->id.irhs_sparse = ja; 2321 mumps->id.nz_rhs = ia[spnr] - 1; 2322 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2323 } else { 2324 mumps->id.irhs_ptr = NULL; 2325 mumps->id.irhs_sparse = NULL; 2326 mumps->id.nz_rhs = 0; 2327 mumps->id.rhs_sparse = NULL; 2328 } 2329 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2330 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2331 2332 /* solve phase */ 2333 /*-------------*/ 2334 mumps->id.job = JOB_SOLVE; 2335 PetscMUMPS_c(mumps); 2336 if (mumps->id.INFOG(1) < 0) 2337 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2338 2339 if (!mumps->myid) { 2340 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2341 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2342 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2343 } 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /*@ 2348 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2349 2350 Logically Collective on Mat 2351 2352 Input Parameters: 2353 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2354 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2355 2356 Output Parameter: 2357 . spRHS - requested entries of inverse of A 2358 2359 Level: beginner 2360 2361 References: 2362 . MUMPS Users' Guide 2363 2364 .seealso: MatGetFactor(), MatCreateTranspose() 2365 @*/ 2366 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2367 { 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 PetscValidType(F,1); 2372 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2373 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2378 { 2379 PetscErrorCode ierr; 2380 Mat spRHS; 2381 2382 PetscFunctionBegin; 2383 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2384 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2385 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 /*@ 2390 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2391 2392 Logically Collective on Mat 2393 2394 Input Parameters: 2395 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2396 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2397 2398 Output Parameter: 2399 . spRHST - requested entries of inverse of A^T 2400 2401 Level: beginner 2402 2403 References: 2404 . MUMPS Users' Guide 2405 2406 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2407 @*/ 2408 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2409 { 2410 PetscErrorCode ierr; 2411 PetscBool flg; 2412 2413 PetscFunctionBegin; 2414 PetscValidType(F,1); 2415 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2416 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2417 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2418 2419 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 /*@ 2424 MatMumpsGetInfo - Get MUMPS parameter INFO() 2425 2426 Logically Collective on Mat 2427 2428 Input Parameters: 2429 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2430 - icntl - index of MUMPS parameter array INFO() 2431 2432 Output Parameter: 2433 . ival - value of MUMPS INFO(icntl) 2434 2435 Level: beginner 2436 2437 References: 2438 . MUMPS Users' Guide 2439 2440 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2441 @*/ 2442 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2443 { 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 PetscValidType(F,1); 2448 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2449 PetscValidIntPointer(ival,3); 2450 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 /*@ 2455 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2456 2457 Logically Collective on Mat 2458 2459 Input Parameters: 2460 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2461 - icntl - index of MUMPS parameter array INFOG() 2462 2463 Output Parameter: 2464 . ival - value of MUMPS INFOG(icntl) 2465 2466 Level: beginner 2467 2468 References: 2469 . MUMPS Users' Guide 2470 2471 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2472 @*/ 2473 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2474 { 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 PetscValidType(F,1); 2479 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2480 PetscValidIntPointer(ival,3); 2481 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 /*@ 2486 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2487 2488 Logically Collective on Mat 2489 2490 Input Parameters: 2491 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2492 - icntl - index of MUMPS parameter array RINFO() 2493 2494 Output Parameter: 2495 . val - value of MUMPS RINFO(icntl) 2496 2497 Level: beginner 2498 2499 References: 2500 . MUMPS Users' Guide 2501 2502 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2503 @*/ 2504 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2505 { 2506 PetscErrorCode ierr; 2507 2508 PetscFunctionBegin; 2509 PetscValidType(F,1); 2510 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2511 PetscValidRealPointer(val,3); 2512 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 /*@ 2517 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2518 2519 Logically Collective on Mat 2520 2521 Input Parameters: 2522 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2523 - icntl - index of MUMPS parameter array RINFOG() 2524 2525 Output Parameter: 2526 . val - value of MUMPS RINFOG(icntl) 2527 2528 Level: beginner 2529 2530 References: 2531 . MUMPS Users' Guide 2532 2533 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2534 @*/ 2535 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2536 { 2537 PetscErrorCode ierr; 2538 2539 PetscFunctionBegin; 2540 PetscValidType(F,1); 2541 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2542 PetscValidRealPointer(val,3); 2543 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 /*MC 2548 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2549 distributed and sequential matrices via the external package MUMPS. 2550 2551 Works with MATAIJ and MATSBAIJ matrices 2552 2553 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2554 2555 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2556 2557 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2558 2559 Options Database Keys: 2560 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2561 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2562 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2563 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2564 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2565 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2566 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2567 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2568 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2569 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2570 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2571 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2572 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2573 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2574 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2575 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2576 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2577 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2578 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2579 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2580 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2581 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2582 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2583 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2584 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2585 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2586 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2587 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2588 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2589 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2590 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2591 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2592 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2593 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2594 2595 Level: beginner 2596 2597 Notes: 2598 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2599 2600 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2601 $ KSPGetPC(ksp,&pc); 2602 $ PCFactorGetMatrix(pc,&mat); 2603 $ MatMumpsGetInfo(mat,....); 2604 $ MatMumpsGetInfog(mat,....); etc. 2605 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2606 2607 Two modes to run MUMPS/PETSc with OpenMP 2608 2609 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2610 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2611 2612 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2613 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2614 2615 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2616 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2617 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2618 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2619 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2620 2621 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2622 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2623 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2624 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2625 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2626 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2627 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2628 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2629 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2630 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2631 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2632 examine the mapping result. 2633 2634 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2635 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2636 calls omp_set_num_threads(m) internally before calling MUMPS. 2637 2638 References: 2639 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2640 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2641 2642 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2643 2644 M*/ 2645 2646 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2647 { 2648 PetscFunctionBegin; 2649 *type = MATSOLVERMUMPS; 2650 PetscFunctionReturn(0); 2651 } 2652 2653 /* MatGetFactor for Seq and MPI AIJ matrices */ 2654 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2655 { 2656 Mat B; 2657 PetscErrorCode ierr; 2658 Mat_MUMPS *mumps; 2659 PetscBool isSeqAIJ; 2660 2661 PetscFunctionBegin; 2662 #if defined(PETSC_USE_COMPLEX) 2663 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2664 #endif 2665 /* Create the factorization matrix */ 2666 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2667 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2668 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2669 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2670 ierr = MatSetUp(B);CHKERRQ(ierr); 2671 2672 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2673 2674 B->ops->view = MatView_MUMPS; 2675 B->ops->getinfo = MatGetInfo_MUMPS; 2676 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2690 2691 if (ftype == MAT_FACTOR_LU) { 2692 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2693 B->factortype = MAT_FACTOR_LU; 2694 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2695 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2696 mumps->sym = 0; 2697 } else { 2698 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2699 B->factortype = MAT_FACTOR_CHOLESKY; 2700 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2701 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2702 #if defined(PETSC_USE_COMPLEX) 2703 mumps->sym = 2; 2704 #else 2705 if (A->spd_set && A->spd) mumps->sym = 1; 2706 else mumps->sym = 2; 2707 #endif 2708 } 2709 2710 /* set solvertype */ 2711 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2712 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2713 2714 B->ops->destroy = MatDestroy_MUMPS; 2715 B->data = (void*)mumps; 2716 2717 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2718 2719 *F = B; 2720 PetscFunctionReturn(0); 2721 } 2722 2723 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2724 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2725 { 2726 Mat B; 2727 PetscErrorCode ierr; 2728 Mat_MUMPS *mumps; 2729 PetscBool isSeqSBAIJ; 2730 2731 PetscFunctionBegin; 2732 #if defined(PETSC_USE_COMPLEX) 2733 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 2734 #endif 2735 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2736 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2737 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2738 ierr = MatSetUp(B);CHKERRQ(ierr); 2739 2740 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2741 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2742 if (isSeqSBAIJ) { 2743 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2744 } else { 2745 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2746 } 2747 2748 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2749 B->ops->view = MatView_MUMPS; 2750 B->ops->getinfo = MatGetInfo_MUMPS; 2751 2752 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2753 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2754 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2755 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2756 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2757 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2758 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2759 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2761 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2762 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2763 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2764 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2765 2766 B->factortype = MAT_FACTOR_CHOLESKY; 2767 #if defined(PETSC_USE_COMPLEX) 2768 mumps->sym = 2; 2769 #else 2770 if (A->spd_set && A->spd) mumps->sym = 1; 2771 else mumps->sym = 2; 2772 #endif 2773 2774 /* set solvertype */ 2775 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2776 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2777 2778 B->ops->destroy = MatDestroy_MUMPS; 2779 B->data = (void*)mumps; 2780 2781 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2782 2783 *F = B; 2784 PetscFunctionReturn(0); 2785 } 2786 2787 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2788 { 2789 Mat B; 2790 PetscErrorCode ierr; 2791 Mat_MUMPS *mumps; 2792 PetscBool isSeqBAIJ; 2793 2794 PetscFunctionBegin; 2795 /* Create the factorization matrix */ 2796 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2797 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2798 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2799 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2800 ierr = MatSetUp(B);CHKERRQ(ierr); 2801 2802 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2803 if (ftype == MAT_FACTOR_LU) { 2804 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2805 B->factortype = MAT_FACTOR_LU; 2806 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2807 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2808 mumps->sym = 0; 2809 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2810 2811 B->ops->view = MatView_MUMPS; 2812 B->ops->getinfo = MatGetInfo_MUMPS; 2813 2814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2826 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2827 2828 /* set solvertype */ 2829 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2830 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2831 2832 B->ops->destroy = MatDestroy_MUMPS; 2833 B->data = (void*)mumps; 2834 2835 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2836 2837 *F = B; 2838 PetscFunctionReturn(0); 2839 } 2840 2841 /* MatGetFactor for Seq and MPI SELL matrices */ 2842 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2843 { 2844 Mat B; 2845 PetscErrorCode ierr; 2846 Mat_MUMPS *mumps; 2847 PetscBool isSeqSELL; 2848 2849 PetscFunctionBegin; 2850 /* Create the factorization matrix */ 2851 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2852 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2853 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2854 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2855 ierr = MatSetUp(B);CHKERRQ(ierr); 2856 2857 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2858 2859 B->ops->view = MatView_MUMPS; 2860 B->ops->getinfo = MatGetInfo_MUMPS; 2861 2862 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2873 2874 if (ftype == MAT_FACTOR_LU) { 2875 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2876 B->factortype = MAT_FACTOR_LU; 2877 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2878 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2879 mumps->sym = 0; 2880 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2881 2882 /* set solvertype */ 2883 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2884 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2885 2886 B->ops->destroy = MatDestroy_MUMPS; 2887 B->data = (void*)mumps; 2888 2889 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2890 2891 *F = B; 2892 PetscFunctionReturn(0); 2893 } 2894 2895 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2896 { 2897 PetscErrorCode ierr; 2898 2899 PetscFunctionBegin; 2900 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2901 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2902 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2903 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2904 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2905 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2906 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2907 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2908 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2909 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2910 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2911 PetscFunctionReturn(0); 2912 } 2913 2914