1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 154 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 155 ierr = MatPinToCPU(B,S->pinnedtocpu);CHKERRQ(ierr); 156 #endif 157 switch (schurstatus) { 158 case MAT_FACTOR_SCHUR_FACTORED: 159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 160 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 161 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 162 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 163 #endif 164 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 165 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 166 } else { 167 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 168 } 169 break; 170 case MAT_FACTOR_SCHUR_INVERTED: 171 sizesol = mumps->id.nrhs*mumps->id.size_schur; 172 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 173 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 174 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 175 mumps->schur_sizesol = sizesol; 176 } 177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 178 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 179 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 180 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 181 #endif 182 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 183 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 184 } else { 185 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 186 } 187 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 188 break; 189 default: 190 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 191 break; 192 } 193 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 194 ierr = MatDestroy(&B);CHKERRQ(ierr); 195 ierr = MatDestroy(&X);CHKERRQ(ierr); 196 PetscFunctionReturn(0); 197 } 198 199 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 200 { 201 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 206 PetscFunctionReturn(0); 207 } 208 if (!expansion) { /* prepare for the condensation step */ 209 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 210 /* allocate MUMPS internal array to store reduced right-hand sides */ 211 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 212 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 213 mumps->id.lredrhs = mumps->id.size_schur; 214 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 215 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 216 } 217 mumps->id.ICNTL(26) = 1; /* condensation phase */ 218 } else { /* prepare for the expansion step */ 219 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 220 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 221 mumps->id.ICNTL(26) = 2; /* expansion phase */ 222 PetscMUMPS_c(mumps); 223 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 224 /* restore defaults */ 225 mumps->id.ICNTL(26) = -1; 226 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 227 if (mumps->id.nrhs > 1) { 228 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 229 mumps->id.lredrhs = 0; 230 mumps->sizeredrhs = 0; 231 } 232 } 233 PetscFunctionReturn(0); 234 } 235 236 /* 237 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 238 239 input: 240 A - matrix in aij,baij or sbaij format 241 shift - 0: C style output triple; 1: Fortran style output triple. 242 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 243 MAT_REUSE_MATRIX: only the values in v array are updated 244 output: 245 nnz - dim of r, c, and v (number of local nonzero entries of A) 246 r, c, v - row and col index, matrix values (matrix triples) 247 248 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 249 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 250 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 251 252 */ 253 254 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 255 { 256 const PetscScalar *av; 257 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 258 PetscInt nz,rnz,i,j; 259 PetscErrorCode ierr; 260 PetscInt *row,*col; 261 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 262 263 PetscFunctionBegin; 264 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 265 *v = (PetscScalar*)av; 266 if (reuse == MAT_INITIAL_MATRIX) { 267 nz = aa->nz; 268 ai = aa->i; 269 aj = aa->j; 270 *nnz = nz; 271 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 272 col = row + nz; 273 274 nz = 0; 275 for (i=0; i<M; i++) { 276 rnz = ai[i+1] - ai[i]; 277 ajj = aj + ai[i]; 278 for (j=0; j<rnz; j++) { 279 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 280 } 281 } 282 *r = row; *c = col; 283 } 284 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 289 { 290 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 291 PetscInt *ptr; 292 293 PetscFunctionBegin; 294 *v = a->val; 295 if (reuse == MAT_INITIAL_MATRIX) { 296 PetscInt nz,i,j,row; 297 PetscErrorCode ierr; 298 299 nz = a->sliidx[a->totalslices]; 300 *nnz = nz; 301 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 302 *r = ptr; 303 *c = ptr + nz; 304 305 for (i=0; i<a->totalslices; i++) { 306 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 307 *ptr++ = 8*i + row + shift; 308 } 309 } 310 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 311 } 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 316 { 317 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 318 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 319 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 320 PetscErrorCode ierr; 321 PetscInt *row,*col; 322 323 PetscFunctionBegin; 324 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 325 M = A->rmap->N/bs; 326 *v = aa->a; 327 if (reuse == MAT_INITIAL_MATRIX) { 328 ai = aa->i; aj = aa->j; 329 nz = bs2*aa->nz; 330 *nnz = nz; 331 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 332 col = row + nz; 333 334 for (i=0; i<M; i++) { 335 ajj = aj + ai[i]; 336 rnz = ai[i+1] - ai[i]; 337 for (k=0; k<rnz; k++) { 338 for (j=0; j<bs; j++) { 339 for (m=0; m<bs; m++) { 340 row[idx] = i*bs + m + shift; 341 col[idx++] = bs*(ajj[k]) + j + shift; 342 } 343 } 344 } 345 } 346 *r = row; *c = col; 347 } 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 352 { 353 const PetscInt *ai, *aj,*ajj; 354 PetscInt nz,rnz,i,j,k,m,bs; 355 PetscErrorCode ierr; 356 PetscInt *row,*col; 357 PetscScalar *val; 358 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 359 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 360 #if defined(PETSC_USE_COMPLEX) 361 PetscBool hermitian; 362 #endif 363 364 PetscFunctionBegin; 365 #if defined(PETSC_USE_COMPLEX) 366 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 367 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 368 #endif 369 ai = aa->i; 370 aj = aa->j; 371 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 372 if (reuse == MAT_INITIAL_MATRIX) { 373 nz = aa->nz; 374 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 375 col = row + bs2*nz; 376 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 377 else val = aa->a; 378 379 *r = row; *c = col; *v = val; 380 } else { 381 if (bs == 1) *v = aa->a; 382 row = *r; col = *c; val = *v; 383 } 384 385 nz = 0; 386 if (bs>1) { 387 for (i=0; i<mbs; i++) { 388 rnz = ai[i+1] - ai[i]; 389 ajj = aj + ai[i]; 390 for (j=0; j<rnz; j++) { 391 for (k=0; k<bs; k++) { 392 for (m=0; m<bs; m++) { 393 if (ajj[j]>i || k>=m) { 394 if (reuse == MAT_INITIAL_MATRIX) { 395 row[nz] = i*bs + m + shift; 396 col[nz] = ajj[j]*bs + k + shift; 397 } 398 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 399 } 400 } 401 } 402 } 403 } 404 } else if (reuse == MAT_INITIAL_MATRIX) { 405 for (i=0; i<mbs; i++) { 406 rnz = ai[i+1] - ai[i]; 407 ajj = aj + ai[i]; 408 for (j=0; j<rnz; j++) { 409 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 410 } 411 } 412 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 413 } 414 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 415 PetscFunctionReturn(0); 416 } 417 418 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 419 { 420 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 421 PetscInt nz,rnz,i,j; 422 const PetscScalar *av,*v1; 423 PetscScalar *val; 424 PetscErrorCode ierr; 425 PetscInt *row,*col; 426 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 427 PetscBool missing; 428 #if defined(PETSC_USE_COMPLEX) 429 PetscBool hermitian; 430 #endif 431 432 PetscFunctionBegin; 433 #if defined(PETSC_USE_COMPLEX) 434 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 435 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 436 #endif 437 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 438 ai = aa->i; aj = aa->j; 439 adiag = aa->diag; 440 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 441 if (reuse == MAT_INITIAL_MATRIX) { 442 /* count nz in the upper triangular part of A */ 443 nz = 0; 444 if (missing) { 445 for (i=0; i<M; i++) { 446 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 447 for (j=ai[i];j<ai[i+1];j++) { 448 if (aj[j] < i) continue; 449 nz++; 450 } 451 } else { 452 nz += ai[i+1] - adiag[i]; 453 } 454 } 455 } else { 456 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 457 } 458 *nnz = nz; 459 460 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 461 col = row + nz; 462 val = (PetscScalar*)(col + nz); 463 464 nz = 0; 465 if (missing) { 466 for (i=0; i<M; i++) { 467 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 468 for (j=ai[i];j<ai[i+1];j++) { 469 if (aj[j] < i) continue; 470 row[nz] = i+shift; 471 col[nz] = aj[j]+shift; 472 val[nz] = av[j]; 473 nz++; 474 } 475 } else { 476 rnz = ai[i+1] - adiag[i]; 477 ajj = aj + adiag[i]; 478 v1 = av + adiag[i]; 479 for (j=0; j<rnz; j++) { 480 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 481 } 482 } 483 } 484 } else { 485 for (i=0; i<M; i++) { 486 rnz = ai[i+1] - adiag[i]; 487 ajj = aj + adiag[i]; 488 v1 = av + adiag[i]; 489 for (j=0; j<rnz; j++) { 490 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 491 } 492 } 493 } 494 *r = row; *c = col; *v = val; 495 } else { 496 nz = 0; val = *v; 497 if (missing) { 498 for (i=0; i <M; i++) { 499 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 500 for (j=ai[i];j<ai[i+1];j++) { 501 if (aj[j] < i) continue; 502 val[nz++] = av[j]; 503 } 504 } else { 505 rnz = ai[i+1] - adiag[i]; 506 v1 = av + adiag[i]; 507 for (j=0; j<rnz; j++) { 508 val[nz++] = v1[j]; 509 } 510 } 511 } 512 } else { 513 for (i=0; i <M; i++) { 514 rnz = ai[i+1] - adiag[i]; 515 v1 = av + adiag[i]; 516 for (j=0; j<rnz; j++) { 517 val[nz++] = v1[j]; 518 } 519 } 520 } 521 } 522 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 527 { 528 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av,*bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 535 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 536 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 537 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 548 rstart = A->rmap->rstart; 549 ai = aa->i; 550 aj = aa->j; 551 bi = bb->i; 552 bj = bb->j; 553 av = aa->a; 554 bv = bb->a; 555 556 garray = mat->garray; 557 558 if (reuse == MAT_INITIAL_MATRIX) { 559 nz = aa->nz + bb->nz; 560 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 561 col = row + bs2*nz; 562 val = (PetscScalar*)(col + bs2*nz); 563 564 *r = row; *c = col; *v = val; 565 } else { 566 row = *r; col = *c; val = *v; 567 } 568 569 jj = 0; irow = rstart; 570 for (i=0; i<mbs; i++) { 571 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 572 countA = ai[i+1] - ai[i]; 573 countB = bi[i+1] - bi[i]; 574 bjj = bj + bi[i]; 575 v1 = av + ai[i]*bs2; 576 v2 = bv + bi[i]*bs2; 577 578 if (bs>1) { 579 /* A-part */ 580 for (j=0; j<countA; j++) { 581 for (k=0; k<bs; k++) { 582 for (m=0; m<bs; m++) { 583 if (rstart + ajj[j]*bs>irow || k>=m) { 584 if (reuse == MAT_INITIAL_MATRIX) { 585 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 586 } 587 val[jj++] = v1[j*bs2 + m + k*bs]; 588 } 589 } 590 } 591 } 592 593 /* B-part */ 594 for (j=0; j < countB; j++) { 595 for (k=0; k<bs; k++) { 596 for (m=0; m<bs; m++) { 597 if (reuse == MAT_INITIAL_MATRIX) { 598 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 599 } 600 val[jj++] = v2[j*bs2 + m + k*bs]; 601 } 602 } 603 } 604 } else { 605 /* A-part */ 606 for (j=0; j<countA; j++) { 607 if (reuse == MAT_INITIAL_MATRIX) { 608 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 609 } 610 val[jj++] = v1[j]; 611 } 612 613 /* B-part */ 614 for (j=0; j < countB; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 617 } 618 val[jj++] = v2[j]; 619 } 620 } 621 irow+=bs; 622 } 623 *nnz = jj; 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 628 { 629 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 630 PetscErrorCode ierr; 631 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 632 PetscInt *row,*col; 633 const PetscScalar *av, *bv,*v1,*v2; 634 PetscScalar *val; 635 Mat Ad,Ao; 636 Mat_SeqAIJ *aa; 637 Mat_SeqAIJ *bb; 638 639 PetscFunctionBegin; 640 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 641 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 642 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 643 644 aa = (Mat_SeqAIJ*)(Ad)->data; 645 bb = (Mat_SeqAIJ*)(Ao)->data; 646 ai = aa->i; 647 aj = aa->j; 648 bi = bb->i; 649 bj = bb->j; 650 651 rstart = A->rmap->rstart; 652 653 if (reuse == MAT_INITIAL_MATRIX) { 654 nz = aa->nz + bb->nz; 655 *nnz = nz; 656 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 657 col = row + nz; 658 val = (PetscScalar*)(col + nz); 659 660 *r = row; *c = col; *v = val; 661 } else { 662 row = *r; col = *c; val = *v; 663 } 664 665 jj = 0; irow = rstart; 666 for (i=0; i<m; i++) { 667 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 668 countA = ai[i+1] - ai[i]; 669 countB = bi[i+1] - bi[i]; 670 bjj = bj + bi[i]; 671 v1 = av + ai[i]; 672 v2 = bv + bi[i]; 673 674 /* A-part */ 675 for (j=0; j<countA; j++) { 676 if (reuse == MAT_INITIAL_MATRIX) { 677 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 678 } 679 val[jj++] = v1[j]; 680 } 681 682 /* B-part */ 683 for (j=0; j < countB; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 686 } 687 val[jj++] = v2[j]; 688 } 689 irow++; 690 } 691 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 692 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 697 { 698 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 699 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 700 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 701 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 702 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 703 const PetscInt bs2=mat->bs2; 704 PetscErrorCode ierr; 705 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 706 PetscInt *row,*col; 707 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 708 PetscScalar *val; 709 710 PetscFunctionBegin; 711 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 712 if (reuse == MAT_INITIAL_MATRIX) { 713 nz = bs2*(aa->nz + bb->nz); 714 *nnz = nz; 715 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 716 col = row + nz; 717 val = (PetscScalar*)(col + nz); 718 719 *r = row; *c = col; *v = val; 720 } else { 721 row = *r; col = *c; val = *v; 722 } 723 724 jj = 0; irow = rstart; 725 for (i=0; i<mbs; i++) { 726 countA = ai[i+1] - ai[i]; 727 countB = bi[i+1] - bi[i]; 728 ajj = aj + ai[i]; 729 bjj = bj + bi[i]; 730 v1 = av + bs2*ai[i]; 731 v2 = bv + bs2*bi[i]; 732 733 idx = 0; 734 /* A-part */ 735 for (k=0; k<countA; k++) { 736 for (j=0; j<bs; j++) { 737 for (n=0; n<bs; n++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 row[jj] = irow + n + shift; 740 col[jj] = rstart + bs*ajj[k] + j + shift; 741 } 742 val[jj++] = v1[idx++]; 743 } 744 } 745 } 746 747 idx = 0; 748 /* B-part */ 749 for (k=0; k<countB; k++) { 750 for (j=0; j<bs; j++) { 751 for (n=0; n<bs; n++) { 752 if (reuse == MAT_INITIAL_MATRIX) { 753 row[jj] = irow + n + shift; 754 col[jj] = bs*garray[bjj[k]] + j + shift; 755 } 756 val[jj++] = v2[idx++]; 757 } 758 } 759 } 760 irow += bs; 761 } 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 766 { 767 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 768 PetscErrorCode ierr; 769 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 770 PetscInt *row,*col; 771 const PetscScalar *av, *bv,*v1,*v2; 772 PetscScalar *val; 773 Mat Ad,Ao; 774 Mat_SeqAIJ *aa; 775 Mat_SeqAIJ *bb; 776 #if defined(PETSC_USE_COMPLEX) 777 PetscBool hermitian; 778 #endif 779 780 PetscFunctionBegin; 781 #if defined(PETSC_USE_COMPLEX) 782 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 783 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 784 #endif 785 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 786 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 787 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 788 789 aa = (Mat_SeqAIJ*)(Ad)->data; 790 bb = (Mat_SeqAIJ*)(Ao)->data; 791 ai = aa->i; 792 aj = aa->j; 793 adiag = aa->diag; 794 bi = bb->i; 795 bj = bb->j; 796 797 rstart = A->rmap->rstart; 798 799 if (reuse == MAT_INITIAL_MATRIX) { 800 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 801 nzb = 0; /* num of upper triangular entries in mat->B */ 802 for (i=0; i<m; i++) { 803 nza += (ai[i+1] - adiag[i]); 804 countB = bi[i+1] - bi[i]; 805 bjj = bj + bi[i]; 806 for (j=0; j<countB; j++) { 807 if (garray[bjj[j]] > rstart) nzb++; 808 } 809 } 810 811 nz = nza + nzb; /* total nz of upper triangular part of mat */ 812 *nnz = nz; 813 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 814 col = row + nz; 815 val = (PetscScalar*)(col + nz); 816 817 *r = row; *c = col; *v = val; 818 } else { 819 row = *r; col = *c; val = *v; 820 } 821 822 jj = 0; irow = rstart; 823 for (i=0; i<m; i++) { 824 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 825 v1 = av + adiag[i]; 826 countA = ai[i+1] - adiag[i]; 827 countB = bi[i+1] - bi[i]; 828 bjj = bj + bi[i]; 829 v2 = bv + bi[i]; 830 831 /* A-part */ 832 for (j=0; j<countA; j++) { 833 if (reuse == MAT_INITIAL_MATRIX) { 834 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 835 } 836 val[jj++] = v1[j]; 837 } 838 839 /* B-part */ 840 for (j=0; j < countB; j++) { 841 if (garray[bjj[j]] > rstart) { 842 if (reuse == MAT_INITIAL_MATRIX) { 843 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 844 } 845 val[jj++] = v2[j]; 846 } 847 } 848 irow++; 849 } 850 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 851 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatDestroy_MUMPS(Mat A) 856 { 857 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 863 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 864 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 865 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 866 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 867 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 868 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 869 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 870 mumps->id.job = JOB_END; 871 PetscMUMPS_c(mumps); 872 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 873 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 874 #endif 875 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 876 ierr = PetscFree(A->data);CHKERRQ(ierr); 877 878 /* clear composed functions */ 879 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 896 { 897 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 898 PetscScalar *array; 899 Vec b_seq; 900 IS is_iden,is_petsc; 901 PetscErrorCode ierr; 902 PetscInt i; 903 PetscBool second_solve = PETSC_FALSE; 904 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 905 906 PetscFunctionBegin; 907 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 908 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 909 910 if (A->factorerrortype) { 911 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 912 ierr = VecSetInf(x);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 mumps->id.ICNTL(20) = 0; /* dense RHS */ 917 mumps->id.nrhs = 1; 918 b_seq = mumps->b_seq; 919 if (mumps->petsc_size > 1) { 920 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 921 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 924 } else { /* petsc_size == 1 */ 925 ierr = VecCopy(b,x);CHKERRQ(ierr); 926 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 927 } 928 if (!mumps->myid) { /* define rhs on the host */ 929 mumps->id.nrhs = 1; 930 mumps->id.rhs = (MumpsScalar*)array; 931 } 932 933 /* 934 handle condensation step of Schur complement (if any) 935 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 936 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 937 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 938 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 939 */ 940 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 941 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 942 second_solve = PETSC_TRUE; 943 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 944 } 945 /* solve phase */ 946 /*-------------*/ 947 mumps->id.job = JOB_SOLVE; 948 PetscMUMPS_c(mumps); 949 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 950 951 /* handle expansion step of Schur complement (if any) */ 952 if (second_solve) { 953 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 954 } 955 956 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 957 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 958 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 959 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 960 } 961 if (!mumps->scat_sol) { /* create scatter scat_sol */ 962 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 963 for (i=0; i<mumps->id.lsol_loc; i++) { 964 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 965 } 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 967 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 968 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 969 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 970 971 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 972 } 973 974 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 } 977 978 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 979 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 980 981 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 986 { 987 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 mumps->id.ICNTL(9) = 0; 992 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 993 mumps->id.ICNTL(9) = 1; 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 998 { 999 PetscErrorCode ierr; 1000 Mat Bt = NULL; 1001 PetscBool flg, flgT; 1002 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1003 PetscInt i,nrhs,M; 1004 PetscScalar *array; 1005 const PetscScalar *rbray; 1006 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1007 PetscScalar *bray,*sol_loc,*sol_loc_save; 1008 IS is_to,is_from; 1009 PetscInt k,proc,j,m,myrstart; 1010 const PetscInt *rstart; 1011 Vec v_mpi,b_seq,msol_loc; 1012 VecScatter scat_rhs,scat_sol; 1013 PetscScalar *aa; 1014 PetscInt spnr,*ia,*ja; 1015 Mat_MPIAIJ *b = NULL; 1016 1017 PetscFunctionBegin; 1018 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1019 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1020 1021 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1022 if (flg) { /* dense B */ 1023 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1024 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1025 } else { /* sparse B */ 1026 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1027 if (flgT) { /* input B is transpose of actural RHS matrix, 1028 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1029 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1030 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1031 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1032 } 1033 1034 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1035 mumps->id.nrhs = nrhs; 1036 mumps->id.lrhs = M; 1037 mumps->id.rhs = NULL; 1038 1039 if (mumps->petsc_size == 1) { 1040 PetscScalar *aa; 1041 PetscInt spnr,*ia,*ja; 1042 PetscBool second_solve = PETSC_FALSE; 1043 1044 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1045 mumps->id.rhs = (MumpsScalar*)array; 1046 1047 if (!Bt) { /* dense B */ 1048 /* copy B to X */ 1049 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1050 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1051 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1052 } else { /* sparse B */ 1053 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1054 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1055 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1056 /* mumps requires ia and ja start at 1! */ 1057 mumps->id.irhs_ptr = ia; 1058 mumps->id.irhs_sparse = ja; 1059 mumps->id.nz_rhs = ia[spnr] - 1; 1060 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1061 } 1062 /* handle condensation step of Schur complement (if any) */ 1063 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1064 second_solve = PETSC_TRUE; 1065 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1066 } 1067 /* solve phase */ 1068 /*-------------*/ 1069 mumps->id.job = JOB_SOLVE; 1070 PetscMUMPS_c(mumps); 1071 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1072 1073 /* handle expansion step of Schur complement (if any) */ 1074 if (second_solve) { 1075 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1076 } 1077 if (Bt) { /* sparse B */ 1078 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1079 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1080 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1081 } 1082 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1087 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1088 1089 /* create msol_loc to hold mumps local solution */ 1090 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1091 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1092 1093 lsol_loc = mumps->id.lsol_loc; 1094 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1095 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1096 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1097 mumps->id.isol_loc = isol_loc; 1098 1099 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1100 1101 if (!Bt) { /* dense B */ 1102 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1103 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1104 0, re-arrange B into desired order, which is a local operation. 1105 */ 1106 1107 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1108 /* wrap dense rhs matrix B into a vector v_mpi */ 1109 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1110 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1111 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1112 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1113 1114 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1115 if (!mumps->myid) { 1116 PetscInt *idx; 1117 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1118 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1120 k = 0; 1121 for (proc=0; proc<mumps->petsc_size; proc++){ 1122 for (j=0; j<nrhs; j++){ 1123 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1124 } 1125 } 1126 1127 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1128 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1129 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1130 } else { 1131 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1132 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1133 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1134 } 1135 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1136 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1138 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 1141 if (!mumps->myid) { /* define rhs on the host */ 1142 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1143 mumps->id.rhs = (MumpsScalar*)bray; 1144 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1145 } 1146 1147 } else { /* sparse B */ 1148 b = (Mat_MPIAIJ*)Bt->data; 1149 1150 /* wrap dense X into a vector v_mpi */ 1151 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1152 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1153 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1154 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1155 1156 if (!mumps->myid) { 1157 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1158 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1159 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1160 /* mumps requires ia and ja start at 1! */ 1161 mumps->id.irhs_ptr = ia; 1162 mumps->id.irhs_sparse = ja; 1163 mumps->id.nz_rhs = ia[spnr] - 1; 1164 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1165 } else { 1166 mumps->id.irhs_ptr = NULL; 1167 mumps->id.irhs_sparse = NULL; 1168 mumps->id.nz_rhs = 0; 1169 mumps->id.rhs_sparse = NULL; 1170 } 1171 } 1172 1173 /* solve phase */ 1174 /*-------------*/ 1175 mumps->id.job = JOB_SOLVE; 1176 PetscMUMPS_c(mumps); 1177 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1178 1179 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1180 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1181 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1182 1183 /* create scatter scat_sol */ 1184 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1185 /* iidx: index for scatter mumps solution to petsc X */ 1186 1187 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1188 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1189 for (i=0; i<lsol_loc; i++) { 1190 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1191 1192 for (proc=0; proc<mumps->petsc_size; proc++){ 1193 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1194 myrstart = rstart[proc]; 1195 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1196 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1197 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1198 break; 1199 } 1200 } 1201 1202 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1203 } 1204 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1205 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1206 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1211 1212 /* free spaces */ 1213 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1214 mumps->id.isol_loc = isol_loc_save; 1215 1216 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1217 ierr = PetscFree(idxx);CHKERRQ(ierr); 1218 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1219 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1220 if (Bt) { 1221 if (!mumps->myid) { 1222 b = (Mat_MPIAIJ*)Bt->data; 1223 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1224 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1225 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1226 } 1227 } else { 1228 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1229 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1230 } 1231 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1232 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1237 { 1238 PetscErrorCode ierr; 1239 PetscBool flg; 1240 Mat B; 1241 1242 PetscFunctionBegin; 1243 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1244 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1245 1246 /* Create B=Bt^T that uses Bt's data structure */ 1247 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1248 1249 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1250 ierr = MatDestroy(&B);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #if !defined(PETSC_USE_COMPLEX) 1255 /* 1256 input: 1257 F: numeric factor 1258 output: 1259 nneg: total number of negative pivots 1260 nzero: total number of zero pivots 1261 npos: (global dimension of F) - nneg - nzero 1262 */ 1263 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1264 { 1265 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1266 PetscErrorCode ierr; 1267 PetscMPIInt size; 1268 1269 PetscFunctionBegin; 1270 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1271 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1272 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1273 1274 if (nneg) *nneg = mumps->id.INFOG(12); 1275 if (nzero || npos) { 1276 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1277 if (nzero) *nzero = mumps->id.INFOG(28); 1278 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 #endif 1283 1284 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1285 { 1286 PetscErrorCode ierr; 1287 PetscInt i,nz=0,*irn,*jcn=0; 1288 PetscScalar *val=0; 1289 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1290 1291 PetscFunctionBegin; 1292 if (mumps->omp_comm_size > 1) { 1293 if (reuse == MAT_INITIAL_MATRIX) { 1294 /* master first gathers counts of nonzeros to receive */ 1295 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1296 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1297 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1298 1299 /* master allocates memory to receive nonzeros */ 1300 if (mumps->is_omp_master) { 1301 displs[0] = 0; 1302 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1303 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1304 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1305 jcn = irn + nz; 1306 val = (PetscScalar*)(jcn + nz); 1307 } 1308 1309 /* save the gatherv plan */ 1310 mumps->mpinz = mpinz; /* used as send count */ 1311 mumps->recvcount = recvcount; 1312 mumps->displs = displs; 1313 1314 /* master gathers nonzeros */ 1315 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1316 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1317 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1318 1319 /* master frees its row/col/val and replaces them with bigger arrays */ 1320 if (mumps->is_omp_master) { 1321 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1322 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1323 mumps->irn = irn; 1324 mumps->jcn = jcn; 1325 mumps->val = val; 1326 } 1327 } else { 1328 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1329 } 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1335 { 1336 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1337 PetscErrorCode ierr; 1338 PetscBool isMPIAIJ; 1339 1340 PetscFunctionBegin; 1341 if (mumps->id.INFOG(1) < 0) { 1342 if (mumps->id.INFOG(1) == -6) { 1343 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1344 } 1345 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1350 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1351 1352 /* numerical factorization phase */ 1353 /*-------------------------------*/ 1354 mumps->id.job = JOB_FACTNUMERIC; 1355 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1356 if (!mumps->myid) { 1357 mumps->id.a = (MumpsScalar*)mumps->val; 1358 } 1359 } else { 1360 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1361 } 1362 PetscMUMPS_c(mumps); 1363 if (mumps->id.INFOG(1) < 0) { 1364 if (A->erroriffailure) { 1365 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1366 } else { 1367 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1368 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1369 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1370 } else if (mumps->id.INFOG(1) == -13) { 1371 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1372 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1373 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1374 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1375 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1376 } else { 1377 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1378 F->factorerrortype = MAT_FACTOR_OTHER; 1379 } 1380 } 1381 } 1382 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1383 1384 F->assembled = PETSC_TRUE; 1385 mumps->matstruc = SAME_NONZERO_PATTERN; 1386 if (F->schur) { /* reset Schur status to unfactored */ 1387 #if defined(PETSC_HAVE_CUDA) 1388 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1389 #endif 1390 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1391 mumps->id.ICNTL(19) = 2; 1392 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1393 } 1394 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1395 } 1396 1397 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1398 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1399 1400 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1401 if (mumps->petsc_size > 1) { 1402 PetscInt lsol_loc; 1403 PetscScalar *sol_loc; 1404 1405 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1406 1407 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1408 if (mumps->x_seq) { 1409 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1410 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1411 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1412 } 1413 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1414 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1415 mumps->id.lsol_loc = lsol_loc; 1416 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1417 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1418 } 1419 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 /* Sets MUMPS options from the options database */ 1424 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1425 { 1426 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1427 PetscErrorCode ierr; 1428 PetscInt icntl,info[80],i,ninfo=80; 1429 PetscBool flg; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1433 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1434 if (flg) mumps->id.ICNTL(1) = icntl; 1435 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1436 if (flg) mumps->id.ICNTL(2) = icntl; 1437 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1438 if (flg) mumps->id.ICNTL(3) = icntl; 1439 1440 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1441 if (flg) mumps->id.ICNTL(4) = icntl; 1442 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1443 1444 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1445 if (flg) mumps->id.ICNTL(6) = icntl; 1446 1447 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1448 if (flg) { 1449 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1450 else mumps->id.ICNTL(7) = icntl; 1451 } 1452 1453 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1454 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1455 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1456 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1458 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1460 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1461 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1462 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1463 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1464 } 1465 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1466 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1467 1468 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1470 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1471 if (mumps->id.ICNTL(24)) { 1472 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1473 } 1474 1475 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1476 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1477 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1480 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1481 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1482 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1483 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1484 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1487 1488 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1489 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1490 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1491 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1492 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1493 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1494 1495 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1496 1497 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1498 if (ninfo) { 1499 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1500 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1501 mumps->ninfo = ninfo; 1502 for (i=0; i<ninfo; i++) { 1503 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1504 else mumps->info[i] = info[i]; 1505 } 1506 } 1507 1508 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1513 { 1514 PetscErrorCode ierr; 1515 PetscInt nthreads=0; 1516 1517 PetscFunctionBegin; 1518 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1519 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1520 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1521 1522 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1523 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1524 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1525 if (mumps->use_petsc_omp_support) { 1526 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1527 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1528 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1529 #else 1530 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1531 #endif 1532 } else { 1533 mumps->omp_comm = PETSC_COMM_SELF; 1534 mumps->mumps_comm = mumps->petsc_comm; 1535 mumps->is_omp_master = PETSC_TRUE; 1536 } 1537 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1538 1539 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1540 mumps->id.job = JOB_INIT; 1541 mumps->id.par = 1; /* host participates factorizaton and solve */ 1542 mumps->id.sym = mumps->sym; 1543 1544 PetscMUMPS_c(mumps); 1545 1546 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1547 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1548 */ 1549 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1550 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1551 1552 mumps->scat_rhs = NULL; 1553 mumps->scat_sol = NULL; 1554 1555 /* set PETSc-MUMPS default options - override MUMPS default */ 1556 mumps->id.ICNTL(3) = 0; 1557 mumps->id.ICNTL(4) = 0; 1558 if (mumps->petsc_size == 1) { 1559 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1560 } else { 1561 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1562 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1563 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1564 } 1565 1566 /* schur */ 1567 mumps->id.size_schur = 0; 1568 mumps->id.listvar_schur = NULL; 1569 mumps->id.schur = NULL; 1570 mumps->sizeredrhs = 0; 1571 mumps->schur_sol = NULL; 1572 mumps->schur_sizesol = 0; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1577 { 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1582 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1583 if (mumps->id.INFOG(1) < 0) { 1584 if (A->erroriffailure) { 1585 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1586 } else { 1587 if (mumps->id.INFOG(1) == -6) { 1588 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1589 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1590 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1591 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1592 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1593 } else { 1594 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1595 F->factorerrortype = MAT_FACTOR_OTHER; 1596 } 1597 } 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1603 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1604 { 1605 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1606 PetscErrorCode ierr; 1607 Vec b; 1608 const PetscInt M = A->rmap->N; 1609 1610 PetscFunctionBegin; 1611 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1612 1613 /* Set MUMPS options from the options database */ 1614 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1615 1616 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1617 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1618 1619 /* analysis phase */ 1620 /*----------------*/ 1621 mumps->id.job = JOB_FACTSYMBOLIC; 1622 mumps->id.n = M; 1623 switch (mumps->id.ICNTL(18)) { 1624 case 0: /* centralized assembled matrix input */ 1625 if (!mumps->myid) { 1626 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1627 if (mumps->id.ICNTL(6)>1) { 1628 mumps->id.a = (MumpsScalar*)mumps->val; 1629 } 1630 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1631 /* 1632 PetscBool flag; 1633 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1634 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1635 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1636 */ 1637 if (!mumps->myid) { 1638 const PetscInt *idx; 1639 PetscInt i,*perm_in; 1640 1641 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1642 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1643 1644 mumps->id.perm_in = perm_in; 1645 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1646 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 break; 1651 case 3: /* distributed assembled matrix input (size>1) */ 1652 mumps->id.nz_loc = mumps->nz; 1653 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1654 if (mumps->id.ICNTL(6)>1) { 1655 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1656 } 1657 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1658 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1659 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1660 ierr = VecDestroy(&b);CHKERRQ(ierr); 1661 break; 1662 } 1663 PetscMUMPS_c(mumps); 1664 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1665 1666 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1667 F->ops->solve = MatSolve_MUMPS; 1668 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1669 F->ops->matsolve = MatMatSolve_MUMPS; 1670 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1671 PetscFunctionReturn(0); 1672 } 1673 1674 /* Note the Petsc r and c permutations are ignored */ 1675 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1676 { 1677 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1678 PetscErrorCode ierr; 1679 Vec b; 1680 const PetscInt M = A->rmap->N; 1681 1682 PetscFunctionBegin; 1683 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1684 1685 /* Set MUMPS options from the options database */ 1686 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1687 1688 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1689 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1690 1691 /* analysis phase */ 1692 /*----------------*/ 1693 mumps->id.job = JOB_FACTSYMBOLIC; 1694 mumps->id.n = M; 1695 switch (mumps->id.ICNTL(18)) { 1696 case 0: /* centralized assembled matrix input */ 1697 if (!mumps->myid) { 1698 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1699 if (mumps->id.ICNTL(6)>1) { 1700 mumps->id.a = (MumpsScalar*)mumps->val; 1701 } 1702 } 1703 break; 1704 case 3: /* distributed assembled matrix input (size>1) */ 1705 mumps->id.nz_loc = mumps->nz; 1706 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1707 if (mumps->id.ICNTL(6)>1) { 1708 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1709 } 1710 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1711 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1712 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1713 ierr = VecDestroy(&b);CHKERRQ(ierr); 1714 break; 1715 } 1716 PetscMUMPS_c(mumps); 1717 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1718 1719 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1720 F->ops->solve = MatSolve_MUMPS; 1721 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1722 PetscFunctionReturn(0); 1723 } 1724 1725 /* Note the Petsc r permutation and factor info are ignored */ 1726 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1727 { 1728 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1729 PetscErrorCode ierr; 1730 Vec b; 1731 const PetscInt M = A->rmap->N; 1732 1733 PetscFunctionBegin; 1734 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1735 1736 /* Set MUMPS options from the options database */ 1737 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1738 1739 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1740 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1741 1742 /* analysis phase */ 1743 /*----------------*/ 1744 mumps->id.job = JOB_FACTSYMBOLIC; 1745 mumps->id.n = M; 1746 switch (mumps->id.ICNTL(18)) { 1747 case 0: /* centralized assembled matrix input */ 1748 if (!mumps->myid) { 1749 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1750 if (mumps->id.ICNTL(6)>1) { 1751 mumps->id.a = (MumpsScalar*)mumps->val; 1752 } 1753 } 1754 break; 1755 case 3: /* distributed assembled matrix input (size>1) */ 1756 mumps->id.nz_loc = mumps->nz; 1757 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1758 if (mumps->id.ICNTL(6)>1) { 1759 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1760 } 1761 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1762 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1763 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1764 ierr = VecDestroy(&b);CHKERRQ(ierr); 1765 break; 1766 } 1767 PetscMUMPS_c(mumps); 1768 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1769 1770 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1771 F->ops->solve = MatSolve_MUMPS; 1772 F->ops->solvetranspose = MatSolve_MUMPS; 1773 F->ops->matsolve = MatMatSolve_MUMPS; 1774 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1775 #if defined(PETSC_USE_COMPLEX) 1776 F->ops->getinertia = NULL; 1777 #else 1778 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1779 #endif 1780 PetscFunctionReturn(0); 1781 } 1782 1783 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1784 { 1785 PetscErrorCode ierr; 1786 PetscBool iascii; 1787 PetscViewerFormat format; 1788 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1789 1790 PetscFunctionBegin; 1791 /* check if matrix is mumps type */ 1792 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1793 1794 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1795 if (iascii) { 1796 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1797 if (format == PETSC_VIEWER_ASCII_INFO) { 1798 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1799 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1811 if (mumps->id.ICNTL(11)>0) { 1812 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1813 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1818 } 1819 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1820 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1822 /* ICNTL(15-17) not used */ 1823 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1824 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1829 1830 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1831 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1836 1837 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1838 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1843 1844 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1845 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1850 1851 /* infomation local to each processor */ 1852 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1853 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1854 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1855 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1856 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1857 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1858 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1859 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1860 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1861 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1862 1863 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1864 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1865 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1866 1867 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1868 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1869 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1870 1871 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1872 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1873 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1874 1875 if (mumps->ninfo && mumps->ninfo <= 80){ 1876 PetscInt i; 1877 for (i=0; i<mumps->ninfo; i++){ 1878 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1879 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1880 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1881 } 1882 } 1883 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1884 1885 if (!mumps->myid) { /* information from the host */ 1886 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1887 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1888 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1890 1891 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1892 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1925 } 1926 } 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1932 { 1933 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1934 1935 PetscFunctionBegin; 1936 info->block_size = 1.0; 1937 info->nz_allocated = mumps->id.INFOG(20); 1938 info->nz_used = mumps->id.INFOG(20); 1939 info->nz_unneeded = 0.0; 1940 info->assemblies = 0.0; 1941 info->mallocs = 0.0; 1942 info->memory = 0.0; 1943 info->fill_ratio_given = 0; 1944 info->fill_ratio_needed = 0; 1945 info->factor_mallocs = 0; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /* -------------------------------------------------------------------------------------------*/ 1950 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1951 { 1952 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1953 const PetscScalar *arr; 1954 const PetscInt *idxs; 1955 PetscInt size,i; 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1960 if (mumps->petsc_size > 1) { 1961 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1962 1963 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1964 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1965 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1966 } 1967 1968 /* Schur complement matrix */ 1969 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1970 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1971 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1972 mumps->id.schur = (MumpsScalar*)arr; 1973 mumps->id.size_schur = size; 1974 mumps->id.schur_lld = size; 1975 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1976 if (mumps->sym == 1) { 1977 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1978 } 1979 1980 /* MUMPS expects Fortran style indices */ 1981 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1982 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1983 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1984 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1985 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1986 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1987 if (mumps->petsc_size > 1) { 1988 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1989 } else { 1990 if (F->factortype == MAT_FACTOR_LU) { 1991 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1992 } else { 1993 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1994 } 1995 } 1996 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1997 mumps->id.ICNTL(26) = -1; 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /* -------------------------------------------------------------------------------------------*/ 2002 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2003 { 2004 Mat St; 2005 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2006 PetscScalar *array; 2007 #if defined(PETSC_USE_COMPLEX) 2008 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2009 #endif 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2014 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2015 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2016 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2017 ierr = MatSetUp(St);CHKERRQ(ierr); 2018 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2019 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2020 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2021 PetscInt i,j,N=mumps->id.size_schur; 2022 for (i=0;i<N;i++) { 2023 for (j=0;j<N;j++) { 2024 #if !defined(PETSC_USE_COMPLEX) 2025 PetscScalar val = mumps->id.schur[i*N+j]; 2026 #else 2027 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2028 #endif 2029 array[j*N+i] = val; 2030 } 2031 } 2032 } else { /* stored by columns */ 2033 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2034 } 2035 } else { /* either full or lower-triangular (not packed) */ 2036 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2037 PetscInt i,j,N=mumps->id.size_schur; 2038 for (i=0;i<N;i++) { 2039 for (j=i;j<N;j++) { 2040 #if !defined(PETSC_USE_COMPLEX) 2041 PetscScalar val = mumps->id.schur[i*N+j]; 2042 #else 2043 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2044 #endif 2045 array[i*N+j] = val; 2046 array[j*N+i] = val; 2047 } 2048 } 2049 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2050 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2051 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2052 PetscInt i,j,N=mumps->id.size_schur; 2053 for (i=0;i<N;i++) { 2054 for (j=0;j<i+1;j++) { 2055 #if !defined(PETSC_USE_COMPLEX) 2056 PetscScalar val = mumps->id.schur[i*N+j]; 2057 #else 2058 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2059 #endif 2060 array[i*N+j] = val; 2061 array[j*N+i] = val; 2062 } 2063 } 2064 } 2065 } 2066 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2067 *S = St; 2068 PetscFunctionReturn(0); 2069 } 2070 2071 /* -------------------------------------------------------------------------------------------*/ 2072 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2073 { 2074 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2075 2076 PetscFunctionBegin; 2077 mumps->id.ICNTL(icntl) = ival; 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2082 { 2083 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2084 2085 PetscFunctionBegin; 2086 *ival = mumps->id.ICNTL(icntl); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 /*@ 2091 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2092 2093 Logically Collective on Mat 2094 2095 Input Parameters: 2096 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2097 . icntl - index of MUMPS parameter array ICNTL() 2098 - ival - value of MUMPS ICNTL(icntl) 2099 2100 Options Database: 2101 . -mat_mumps_icntl_<icntl> <ival> 2102 2103 Level: beginner 2104 2105 References: 2106 . MUMPS Users' Guide 2107 2108 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2109 @*/ 2110 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2111 { 2112 PetscErrorCode ierr; 2113 2114 PetscFunctionBegin; 2115 PetscValidType(F,1); 2116 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2117 PetscValidLogicalCollectiveInt(F,icntl,2); 2118 PetscValidLogicalCollectiveInt(F,ival,3); 2119 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /*@ 2124 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2125 2126 Logically Collective on Mat 2127 2128 Input Parameters: 2129 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2130 - icntl - index of MUMPS parameter array ICNTL() 2131 2132 Output Parameter: 2133 . ival - value of MUMPS ICNTL(icntl) 2134 2135 Level: beginner 2136 2137 References: 2138 . MUMPS Users' Guide 2139 2140 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2141 @*/ 2142 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2143 { 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 PetscValidType(F,1); 2148 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2149 PetscValidLogicalCollectiveInt(F,icntl,2); 2150 PetscValidIntPointer(ival,3); 2151 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 /* -------------------------------------------------------------------------------------------*/ 2156 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2157 { 2158 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2159 2160 PetscFunctionBegin; 2161 mumps->id.CNTL(icntl) = val; 2162 PetscFunctionReturn(0); 2163 } 2164 2165 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2166 { 2167 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2168 2169 PetscFunctionBegin; 2170 *val = mumps->id.CNTL(icntl); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 /*@ 2175 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2176 2177 Logically Collective on Mat 2178 2179 Input Parameters: 2180 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2181 . icntl - index of MUMPS parameter array CNTL() 2182 - val - value of MUMPS CNTL(icntl) 2183 2184 Options Database: 2185 . -mat_mumps_cntl_<icntl> <val> 2186 2187 Level: beginner 2188 2189 References: 2190 . MUMPS Users' Guide 2191 2192 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2193 @*/ 2194 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2195 { 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 PetscValidType(F,1); 2200 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2201 PetscValidLogicalCollectiveInt(F,icntl,2); 2202 PetscValidLogicalCollectiveReal(F,val,3); 2203 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 /*@ 2208 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2209 2210 Logically Collective on Mat 2211 2212 Input Parameters: 2213 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2214 - icntl - index of MUMPS parameter array CNTL() 2215 2216 Output Parameter: 2217 . val - value of MUMPS CNTL(icntl) 2218 2219 Level: beginner 2220 2221 References: 2222 . MUMPS Users' Guide 2223 2224 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2225 @*/ 2226 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2227 { 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 PetscValidType(F,1); 2232 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2233 PetscValidLogicalCollectiveInt(F,icntl,2); 2234 PetscValidRealPointer(val,3); 2235 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2240 { 2241 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2242 2243 PetscFunctionBegin; 2244 *info = mumps->id.INFO(icntl); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2249 { 2250 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2251 2252 PetscFunctionBegin; 2253 *infog = mumps->id.INFOG(icntl); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2258 { 2259 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2260 2261 PetscFunctionBegin; 2262 *rinfo = mumps->id.RINFO(icntl); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2267 { 2268 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2269 2270 PetscFunctionBegin; 2271 *rinfog = mumps->id.RINFOG(icntl); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2276 { 2277 PetscErrorCode ierr; 2278 Mat Bt = NULL,Btseq = NULL; 2279 PetscBool flg; 2280 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2281 PetscScalar *aa; 2282 PetscInt spnr,*ia,*ja,M,nrhs; 2283 2284 PetscFunctionBegin; 2285 PetscValidIntPointer(spRHS,2); 2286 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2287 if (flg) { 2288 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2289 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2290 2291 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2292 2293 if (mumps->petsc_size > 1) { 2294 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2295 Btseq = b->A; 2296 } else { 2297 Btseq = Bt; 2298 } 2299 2300 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2301 mumps->id.nrhs = nrhs; 2302 mumps->id.lrhs = M; 2303 mumps->id.rhs = NULL; 2304 2305 if (!mumps->myid) { 2306 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2307 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2308 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2309 2310 mumps->id.irhs_ptr = ia; 2311 mumps->id.irhs_sparse = ja; 2312 mumps->id.nz_rhs = ia[spnr] - 1; 2313 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2314 } else { 2315 mumps->id.irhs_ptr = NULL; 2316 mumps->id.irhs_sparse = NULL; 2317 mumps->id.nz_rhs = 0; 2318 mumps->id.rhs_sparse = NULL; 2319 } 2320 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2321 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2322 2323 /* solve phase */ 2324 /*-------------*/ 2325 mumps->id.job = JOB_SOLVE; 2326 PetscMUMPS_c(mumps); 2327 if (mumps->id.INFOG(1) < 0) 2328 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2329 2330 if (!mumps->myid) { 2331 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2332 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2333 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2334 } 2335 PetscFunctionReturn(0); 2336 } 2337 2338 /*@ 2339 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2340 2341 Logically Collective on Mat 2342 2343 Input Parameters: 2344 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2345 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2346 2347 Output Parameter: 2348 . spRHS - requested entries of inverse of A 2349 2350 Level: beginner 2351 2352 References: 2353 . MUMPS Users' Guide 2354 2355 .seealso: MatGetFactor(), MatCreateTranspose() 2356 @*/ 2357 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2358 { 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 PetscValidType(F,1); 2363 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2364 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2369 { 2370 PetscErrorCode ierr; 2371 Mat spRHS; 2372 2373 PetscFunctionBegin; 2374 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2375 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2376 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 /*@ 2381 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2382 2383 Logically Collective on Mat 2384 2385 Input Parameters: 2386 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2387 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2388 2389 Output Parameter: 2390 . spRHST - requested entries of inverse of A^T 2391 2392 Level: beginner 2393 2394 References: 2395 . MUMPS Users' Guide 2396 2397 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2398 @*/ 2399 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2400 { 2401 PetscErrorCode ierr; 2402 PetscBool flg; 2403 2404 PetscFunctionBegin; 2405 PetscValidType(F,1); 2406 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2407 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2408 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2409 2410 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 /*@ 2415 MatMumpsGetInfo - Get MUMPS parameter INFO() 2416 2417 Logically Collective on Mat 2418 2419 Input Parameters: 2420 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2421 - icntl - index of MUMPS parameter array INFO() 2422 2423 Output Parameter: 2424 . ival - value of MUMPS INFO(icntl) 2425 2426 Level: beginner 2427 2428 References: 2429 . MUMPS Users' Guide 2430 2431 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2432 @*/ 2433 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2434 { 2435 PetscErrorCode ierr; 2436 2437 PetscFunctionBegin; 2438 PetscValidType(F,1); 2439 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2440 PetscValidIntPointer(ival,3); 2441 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 /*@ 2446 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2447 2448 Logically Collective on Mat 2449 2450 Input Parameters: 2451 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2452 - icntl - index of MUMPS parameter array INFOG() 2453 2454 Output Parameter: 2455 . ival - value of MUMPS INFOG(icntl) 2456 2457 Level: beginner 2458 2459 References: 2460 . MUMPS Users' Guide 2461 2462 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2463 @*/ 2464 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2465 { 2466 PetscErrorCode ierr; 2467 2468 PetscFunctionBegin; 2469 PetscValidType(F,1); 2470 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2471 PetscValidIntPointer(ival,3); 2472 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 /*@ 2477 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2478 2479 Logically Collective on Mat 2480 2481 Input Parameters: 2482 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2483 - icntl - index of MUMPS parameter array RINFO() 2484 2485 Output Parameter: 2486 . val - value of MUMPS RINFO(icntl) 2487 2488 Level: beginner 2489 2490 References: 2491 . MUMPS Users' Guide 2492 2493 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2494 @*/ 2495 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2496 { 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 PetscValidType(F,1); 2501 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2502 PetscValidRealPointer(val,3); 2503 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 /*@ 2508 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2509 2510 Logically Collective on Mat 2511 2512 Input Parameters: 2513 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2514 - icntl - index of MUMPS parameter array RINFOG() 2515 2516 Output Parameter: 2517 . val - value of MUMPS RINFOG(icntl) 2518 2519 Level: beginner 2520 2521 References: 2522 . MUMPS Users' Guide 2523 2524 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2525 @*/ 2526 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2527 { 2528 PetscErrorCode ierr; 2529 2530 PetscFunctionBegin; 2531 PetscValidType(F,1); 2532 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2533 PetscValidRealPointer(val,3); 2534 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 /*MC 2539 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2540 distributed and sequential matrices via the external package MUMPS. 2541 2542 Works with MATAIJ and MATSBAIJ matrices 2543 2544 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2545 2546 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2547 2548 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2549 2550 Options Database Keys: 2551 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2552 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2553 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2554 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2555 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2556 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2557 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2558 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2559 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2560 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2561 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2562 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2563 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2564 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2565 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2566 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2567 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2568 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2569 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2570 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2571 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2572 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2573 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2574 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2575 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2576 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2577 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2578 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2579 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2580 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2581 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2582 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2583 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2584 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2585 2586 Level: beginner 2587 2588 Notes: 2589 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2590 2591 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2592 $ KSPGetPC(ksp,&pc); 2593 $ PCFactorGetMatrix(pc,&mat); 2594 $ MatMumpsGetInfo(mat,....); 2595 $ MatMumpsGetInfog(mat,....); etc. 2596 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2597 2598 Two modes to run MUMPS/PETSc with OpenMP 2599 2600 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2601 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2602 2603 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2604 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2605 2606 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2607 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2608 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2609 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2610 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2611 2612 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2613 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2614 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2615 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2616 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2617 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2618 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2619 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2620 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2621 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2622 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2623 examine the mapping result. 2624 2625 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2626 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2627 calls omp_set_num_threads(m) internally before calling MUMPS. 2628 2629 References: 2630 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2631 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2632 2633 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2634 2635 M*/ 2636 2637 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2638 { 2639 PetscFunctionBegin; 2640 *type = MATSOLVERMUMPS; 2641 PetscFunctionReturn(0); 2642 } 2643 2644 /* MatGetFactor for Seq and MPI AIJ matrices */ 2645 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2646 { 2647 Mat B; 2648 PetscErrorCode ierr; 2649 Mat_MUMPS *mumps; 2650 PetscBool isSeqAIJ; 2651 2652 PetscFunctionBegin; 2653 /* Create the factorization matrix */ 2654 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2655 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2656 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2657 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2658 ierr = MatSetUp(B);CHKERRQ(ierr); 2659 2660 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2661 2662 B->ops->view = MatView_MUMPS; 2663 B->ops->getinfo = MatGetInfo_MUMPS; 2664 2665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2678 2679 if (ftype == MAT_FACTOR_LU) { 2680 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2681 B->factortype = MAT_FACTOR_LU; 2682 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2683 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2684 mumps->sym = 0; 2685 } else { 2686 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2687 B->factortype = MAT_FACTOR_CHOLESKY; 2688 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2689 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2690 #if defined(PETSC_USE_COMPLEX) 2691 mumps->sym = 2; 2692 #else 2693 if (A->spd_set && A->spd) mumps->sym = 1; 2694 else mumps->sym = 2; 2695 #endif 2696 } 2697 2698 /* set solvertype */ 2699 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2700 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2701 2702 B->ops->destroy = MatDestroy_MUMPS; 2703 B->data = (void*)mumps; 2704 2705 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2706 2707 *F = B; 2708 PetscFunctionReturn(0); 2709 } 2710 2711 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2712 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2713 { 2714 Mat B; 2715 PetscErrorCode ierr; 2716 Mat_MUMPS *mumps; 2717 PetscBool isSeqSBAIJ; 2718 2719 PetscFunctionBegin; 2720 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2721 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2722 /* Create the factorization matrix */ 2723 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2724 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2725 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2726 ierr = MatSetUp(B);CHKERRQ(ierr); 2727 2728 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2729 if (isSeqSBAIJ) { 2730 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2731 } else { 2732 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2733 } 2734 2735 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2736 B->ops->view = MatView_MUMPS; 2737 B->ops->getinfo = MatGetInfo_MUMPS; 2738 2739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2751 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2752 2753 B->factortype = MAT_FACTOR_CHOLESKY; 2754 #if defined(PETSC_USE_COMPLEX) 2755 mumps->sym = 2; 2756 #else 2757 if (A->spd_set && A->spd) mumps->sym = 1; 2758 else mumps->sym = 2; 2759 #endif 2760 2761 /* set solvertype */ 2762 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2763 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2764 2765 B->ops->destroy = MatDestroy_MUMPS; 2766 B->data = (void*)mumps; 2767 2768 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2769 2770 *F = B; 2771 PetscFunctionReturn(0); 2772 } 2773 2774 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2775 { 2776 Mat B; 2777 PetscErrorCode ierr; 2778 Mat_MUMPS *mumps; 2779 PetscBool isSeqBAIJ; 2780 2781 PetscFunctionBegin; 2782 /* Create the factorization matrix */ 2783 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2784 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2785 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2786 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2787 ierr = MatSetUp(B);CHKERRQ(ierr); 2788 2789 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2790 if (ftype == MAT_FACTOR_LU) { 2791 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2792 B->factortype = MAT_FACTOR_LU; 2793 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2794 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2795 mumps->sym = 0; 2796 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2797 2798 B->ops->view = MatView_MUMPS; 2799 B->ops->getinfo = MatGetInfo_MUMPS; 2800 2801 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2802 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2803 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2804 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2806 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2809 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2814 2815 /* set solvertype */ 2816 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2817 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2818 2819 B->ops->destroy = MatDestroy_MUMPS; 2820 B->data = (void*)mumps; 2821 2822 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2823 2824 *F = B; 2825 PetscFunctionReturn(0); 2826 } 2827 2828 /* MatGetFactor for Seq and MPI SELL matrices */ 2829 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2830 { 2831 Mat B; 2832 PetscErrorCode ierr; 2833 Mat_MUMPS *mumps; 2834 PetscBool isSeqSELL; 2835 2836 PetscFunctionBegin; 2837 /* Create the factorization matrix */ 2838 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2839 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2840 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2841 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2842 ierr = MatSetUp(B);CHKERRQ(ierr); 2843 2844 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2845 2846 B->ops->view = MatView_MUMPS; 2847 B->ops->getinfo = MatGetInfo_MUMPS; 2848 2849 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2850 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2857 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2858 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2859 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2860 2861 if (ftype == MAT_FACTOR_LU) { 2862 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2863 B->factortype = MAT_FACTOR_LU; 2864 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2865 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2866 mumps->sym = 0; 2867 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2868 2869 /* set solvertype */ 2870 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2871 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2872 2873 B->ops->destroy = MatDestroy_MUMPS; 2874 B->data = (void*)mumps; 2875 2876 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2877 2878 *F = B; 2879 PetscFunctionReturn(0); 2880 } 2881 2882 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2883 { 2884 PetscErrorCode ierr; 2885 2886 PetscFunctionBegin; 2887 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2888 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2889 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2890 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2891 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2892 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2893 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2894 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2895 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2896 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2897 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901