1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 154 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 155 ierr = MatPinToCPU(B,S->pinnedtocpu);CHKERRQ(ierr); 156 #endif 157 switch (schurstatus) { 158 case MAT_FACTOR_SCHUR_FACTORED: 159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 160 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 161 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 162 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 163 #endif 164 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 165 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 166 } else { 167 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 168 } 169 break; 170 case MAT_FACTOR_SCHUR_INVERTED: 171 sizesol = mumps->id.nrhs*mumps->id.size_schur; 172 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 173 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 174 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 175 mumps->schur_sizesol = sizesol; 176 } 177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 178 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 179 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 180 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 181 #endif 182 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 183 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 184 } else { 185 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 186 } 187 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 188 break; 189 default: 190 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 191 break; 192 } 193 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 194 ierr = MatDestroy(&B);CHKERRQ(ierr); 195 ierr = MatDestroy(&X);CHKERRQ(ierr); 196 PetscFunctionReturn(0); 197 } 198 199 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 200 { 201 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 206 PetscFunctionReturn(0); 207 } 208 if (!expansion) { /* prepare for the condensation step */ 209 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 210 /* allocate MUMPS internal array to store reduced right-hand sides */ 211 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 212 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 213 mumps->id.lredrhs = mumps->id.size_schur; 214 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 215 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 216 } 217 mumps->id.ICNTL(26) = 1; /* condensation phase */ 218 } else { /* prepare for the expansion step */ 219 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 220 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 221 mumps->id.ICNTL(26) = 2; /* expansion phase */ 222 PetscMUMPS_c(mumps); 223 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 224 /* restore defaults */ 225 mumps->id.ICNTL(26) = -1; 226 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 227 if (mumps->id.nrhs > 1) { 228 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 229 mumps->id.lredrhs = 0; 230 mumps->sizeredrhs = 0; 231 } 232 } 233 PetscFunctionReturn(0); 234 } 235 236 /* 237 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 238 239 input: 240 A - matrix in aij,baij or sbaij format 241 shift - 0: C style output triple; 1: Fortran style output triple. 242 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 243 MAT_REUSE_MATRIX: only the values in v array are updated 244 output: 245 nnz - dim of r, c, and v (number of local nonzero entries of A) 246 r, c, v - row and col index, matrix values (matrix triples) 247 248 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 249 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 250 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 251 252 */ 253 254 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 255 { 256 const PetscScalar *av; 257 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 258 PetscInt nz,rnz,i,j; 259 PetscErrorCode ierr; 260 PetscInt *row,*col; 261 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 262 263 PetscFunctionBegin; 264 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 265 *v = (PetscScalar*)av; 266 if (reuse == MAT_INITIAL_MATRIX) { 267 nz = aa->nz; 268 ai = aa->i; 269 aj = aa->j; 270 *nnz = nz; 271 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 272 col = row + nz; 273 274 nz = 0; 275 for (i=0; i<M; i++) { 276 rnz = ai[i+1] - ai[i]; 277 ajj = aj + ai[i]; 278 for (j=0; j<rnz; j++) { 279 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 280 } 281 } 282 *r = row; *c = col; 283 } 284 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 289 { 290 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 291 PetscInt *ptr; 292 293 PetscFunctionBegin; 294 *v = a->val; 295 if (reuse == MAT_INITIAL_MATRIX) { 296 PetscInt nz,i,j,row; 297 PetscErrorCode ierr; 298 299 nz = a->sliidx[a->totalslices]; 300 *nnz = nz; 301 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 302 *r = ptr; 303 *c = ptr + nz; 304 305 for (i=0; i<a->totalslices; i++) { 306 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 307 *ptr++ = 8*i + row + shift; 308 } 309 } 310 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 311 } 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 316 { 317 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 318 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 319 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 320 PetscErrorCode ierr; 321 PetscInt *row,*col; 322 323 PetscFunctionBegin; 324 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 325 M = A->rmap->N/bs; 326 *v = aa->a; 327 if (reuse == MAT_INITIAL_MATRIX) { 328 ai = aa->i; aj = aa->j; 329 nz = bs2*aa->nz; 330 *nnz = nz; 331 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 332 col = row + nz; 333 334 for (i=0; i<M; i++) { 335 ajj = aj + ai[i]; 336 rnz = ai[i+1] - ai[i]; 337 for (k=0; k<rnz; k++) { 338 for (j=0; j<bs; j++) { 339 for (m=0; m<bs; m++) { 340 row[idx] = i*bs + m + shift; 341 col[idx++] = bs*(ajj[k]) + j + shift; 342 } 343 } 344 } 345 } 346 *r = row; *c = col; 347 } 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 352 { 353 const PetscInt *ai, *aj,*ajj; 354 PetscInt nz,rnz,i,j,k,m,bs; 355 PetscErrorCode ierr; 356 PetscInt *row,*col; 357 PetscScalar *val; 358 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 359 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 360 #if defined(PETSC_USE_COMPLEX) 361 PetscBool hermitian; 362 #endif 363 364 PetscFunctionBegin; 365 #if defined(PETSC_USE_COMPLEX) 366 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 367 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 368 #endif 369 ai = aa->i; 370 aj = aa->j; 371 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 372 if (reuse == MAT_INITIAL_MATRIX) { 373 nz = aa->nz; 374 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 375 col = row + bs2*nz; 376 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 377 else val = aa->a; 378 379 *r = row; *c = col; *v = val; 380 } else { 381 if (bs == 1) *v = aa->a; 382 row = *r; col = *c; val = *v; 383 } 384 385 nz = 0; 386 if (bs>1) { 387 for (i=0; i<mbs; i++) { 388 rnz = ai[i+1] - ai[i]; 389 ajj = aj + ai[i]; 390 for (j=0; j<rnz; j++) { 391 for (k=0; k<bs; k++) { 392 for (m=0; m<bs; m++) { 393 if (ajj[j]>i || k>=m) { 394 if (reuse == MAT_INITIAL_MATRIX) { 395 row[nz] = i*bs + m + shift; 396 col[nz] = ajj[j]*bs + k + shift; 397 } 398 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 399 } 400 } 401 } 402 } 403 } 404 } else if (reuse == MAT_INITIAL_MATRIX) { 405 for (i=0; i<mbs; i++) { 406 rnz = ai[i+1] - ai[i]; 407 ajj = aj + ai[i]; 408 for (j=0; j<rnz; j++) { 409 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 410 } 411 } 412 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 413 } 414 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 415 PetscFunctionReturn(0); 416 } 417 418 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 419 { 420 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 421 PetscInt nz,rnz,i,j; 422 const PetscScalar *av,*v1; 423 PetscScalar *val; 424 PetscErrorCode ierr; 425 PetscInt *row,*col; 426 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 427 PetscBool missing; 428 #if defined(PETSC_USE_COMPLEX) 429 PetscBool hermitian; 430 #endif 431 432 PetscFunctionBegin; 433 #if defined(PETSC_USE_COMPLEX) 434 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 435 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 436 #endif 437 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 438 ai = aa->i; aj = aa->j; 439 adiag = aa->diag; 440 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 441 if (reuse == MAT_INITIAL_MATRIX) { 442 /* count nz in the upper triangular part of A */ 443 nz = 0; 444 if (missing) { 445 for (i=0; i<M; i++) { 446 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 447 for (j=ai[i];j<ai[i+1];j++) { 448 if (aj[j] < i) continue; 449 nz++; 450 } 451 } else { 452 nz += ai[i+1] - adiag[i]; 453 } 454 } 455 } else { 456 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 457 } 458 *nnz = nz; 459 460 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 461 col = row + nz; 462 val = (PetscScalar*)(col + nz); 463 464 nz = 0; 465 if (missing) { 466 for (i=0; i<M; i++) { 467 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 468 for (j=ai[i];j<ai[i+1];j++) { 469 if (aj[j] < i) continue; 470 row[nz] = i+shift; 471 col[nz] = aj[j]+shift; 472 val[nz] = av[j]; 473 nz++; 474 } 475 } else { 476 rnz = ai[i+1] - adiag[i]; 477 ajj = aj + adiag[i]; 478 v1 = av + adiag[i]; 479 for (j=0; j<rnz; j++) { 480 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 481 } 482 } 483 } 484 } else { 485 for (i=0; i<M; i++) { 486 rnz = ai[i+1] - adiag[i]; 487 ajj = aj + adiag[i]; 488 v1 = av + adiag[i]; 489 for (j=0; j<rnz; j++) { 490 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 491 } 492 } 493 } 494 *r = row; *c = col; *v = val; 495 } else { 496 nz = 0; val = *v; 497 if (missing) { 498 for (i=0; i <M; i++) { 499 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 500 for (j=ai[i];j<ai[i+1];j++) { 501 if (aj[j] < i) continue; 502 val[nz++] = av[j]; 503 } 504 } else { 505 rnz = ai[i+1] - adiag[i]; 506 v1 = av + adiag[i]; 507 for (j=0; j<rnz; j++) { 508 val[nz++] = v1[j]; 509 } 510 } 511 } 512 } else { 513 for (i=0; i <M; i++) { 514 rnz = ai[i+1] - adiag[i]; 515 v1 = av + adiag[i]; 516 for (j=0; j<rnz; j++) { 517 val[nz++] = v1[j]; 518 } 519 } 520 } 521 } 522 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 527 { 528 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av,*bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 535 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 536 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 537 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 548 rstart = A->rmap->rstart; 549 ai = aa->i; 550 aj = aa->j; 551 bi = bb->i; 552 bj = bb->j; 553 av = aa->a; 554 bv = bb->a; 555 556 garray = mat->garray; 557 558 if (reuse == MAT_INITIAL_MATRIX) { 559 nz = aa->nz + bb->nz; 560 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 561 col = row + bs2*nz; 562 val = (PetscScalar*)(col + bs2*nz); 563 564 *r = row; *c = col; *v = val; 565 } else { 566 row = *r; col = *c; val = *v; 567 } 568 569 jj = 0; irow = rstart; 570 for (i=0; i<mbs; i++) { 571 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 572 countA = ai[i+1] - ai[i]; 573 countB = bi[i+1] - bi[i]; 574 bjj = bj + bi[i]; 575 v1 = av + ai[i]*bs2; 576 v2 = bv + bi[i]*bs2; 577 578 if (bs>1) { 579 /* A-part */ 580 for (j=0; j<countA; j++) { 581 for (k=0; k<bs; k++) { 582 for (m=0; m<bs; m++) { 583 if (rstart + ajj[j]*bs>irow || k>=m) { 584 if (reuse == MAT_INITIAL_MATRIX) { 585 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 586 } 587 val[jj++] = v1[j*bs2 + m + k*bs]; 588 } 589 } 590 } 591 } 592 593 /* B-part */ 594 for (j=0; j < countB; j++) { 595 for (k=0; k<bs; k++) { 596 for (m=0; m<bs; m++) { 597 if (reuse == MAT_INITIAL_MATRIX) { 598 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 599 } 600 val[jj++] = v2[j*bs2 + m + k*bs]; 601 } 602 } 603 } 604 } else { 605 /* A-part */ 606 for (j=0; j<countA; j++) { 607 if (reuse == MAT_INITIAL_MATRIX) { 608 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 609 } 610 val[jj++] = v1[j]; 611 } 612 613 /* B-part */ 614 for (j=0; j < countB; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 617 } 618 val[jj++] = v2[j]; 619 } 620 } 621 irow+=bs; 622 } 623 *nnz = jj; 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 628 { 629 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 630 PetscErrorCode ierr; 631 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 632 PetscInt *row,*col; 633 const PetscScalar *av, *bv,*v1,*v2; 634 PetscScalar *val; 635 Mat Ad,Ao; 636 Mat_SeqAIJ *aa; 637 Mat_SeqAIJ *bb; 638 639 PetscFunctionBegin; 640 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 641 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 642 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 643 644 aa = (Mat_SeqAIJ*)(Ad)->data; 645 bb = (Mat_SeqAIJ*)(Ao)->data; 646 ai = aa->i; 647 aj = aa->j; 648 bi = bb->i; 649 bj = bb->j; 650 651 rstart = A->rmap->rstart; 652 653 if (reuse == MAT_INITIAL_MATRIX) { 654 nz = aa->nz + bb->nz; 655 *nnz = nz; 656 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 657 col = row + nz; 658 val = (PetscScalar*)(col + nz); 659 660 *r = row; *c = col; *v = val; 661 } else { 662 row = *r; col = *c; val = *v; 663 } 664 665 jj = 0; irow = rstart; 666 for (i=0; i<m; i++) { 667 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 668 countA = ai[i+1] - ai[i]; 669 countB = bi[i+1] - bi[i]; 670 bjj = bj + bi[i]; 671 v1 = av + ai[i]; 672 v2 = bv + bi[i]; 673 674 /* A-part */ 675 for (j=0; j<countA; j++) { 676 if (reuse == MAT_INITIAL_MATRIX) { 677 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 678 } 679 val[jj++] = v1[j]; 680 } 681 682 /* B-part */ 683 for (j=0; j < countB; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 686 } 687 val[jj++] = v2[j]; 688 } 689 irow++; 690 } 691 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 692 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 697 { 698 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 699 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 700 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 701 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 702 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 703 const PetscInt bs2=mat->bs2; 704 PetscErrorCode ierr; 705 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 706 PetscInt *row,*col; 707 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 708 PetscScalar *val; 709 710 PetscFunctionBegin; 711 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 712 if (reuse == MAT_INITIAL_MATRIX) { 713 nz = bs2*(aa->nz + bb->nz); 714 *nnz = nz; 715 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 716 col = row + nz; 717 val = (PetscScalar*)(col + nz); 718 719 *r = row; *c = col; *v = val; 720 } else { 721 row = *r; col = *c; val = *v; 722 } 723 724 jj = 0; irow = rstart; 725 for (i=0; i<mbs; i++) { 726 countA = ai[i+1] - ai[i]; 727 countB = bi[i+1] - bi[i]; 728 ajj = aj + ai[i]; 729 bjj = bj + bi[i]; 730 v1 = av + bs2*ai[i]; 731 v2 = bv + bs2*bi[i]; 732 733 idx = 0; 734 /* A-part */ 735 for (k=0; k<countA; k++) { 736 for (j=0; j<bs; j++) { 737 for (n=0; n<bs; n++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 row[jj] = irow + n + shift; 740 col[jj] = rstart + bs*ajj[k] + j + shift; 741 } 742 val[jj++] = v1[idx++]; 743 } 744 } 745 } 746 747 idx = 0; 748 /* B-part */ 749 for (k=0; k<countB; k++) { 750 for (j=0; j<bs; j++) { 751 for (n=0; n<bs; n++) { 752 if (reuse == MAT_INITIAL_MATRIX) { 753 row[jj] = irow + n + shift; 754 col[jj] = bs*garray[bjj[k]] + j + shift; 755 } 756 val[jj++] = v2[idx++]; 757 } 758 } 759 } 760 irow += bs; 761 } 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 766 { 767 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 768 PetscErrorCode ierr; 769 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 770 PetscInt *row,*col; 771 const PetscScalar *av, *bv,*v1,*v2; 772 PetscScalar *val; 773 Mat Ad,Ao; 774 Mat_SeqAIJ *aa; 775 Mat_SeqAIJ *bb; 776 #if defined(PETSC_USE_COMPLEX) 777 PetscBool hermitian; 778 #endif 779 780 PetscFunctionBegin; 781 #if defined(PETSC_USE_COMPLEX) 782 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 783 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 784 #endif 785 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 786 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 787 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 788 789 aa = (Mat_SeqAIJ*)(Ad)->data; 790 bb = (Mat_SeqAIJ*)(Ao)->data; 791 ai = aa->i; 792 aj = aa->j; 793 adiag = aa->diag; 794 bi = bb->i; 795 bj = bb->j; 796 797 rstart = A->rmap->rstart; 798 799 if (reuse == MAT_INITIAL_MATRIX) { 800 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 801 nzb = 0; /* num of upper triangular entries in mat->B */ 802 for (i=0; i<m; i++) { 803 nza += (ai[i+1] - adiag[i]); 804 countB = bi[i+1] - bi[i]; 805 bjj = bj + bi[i]; 806 for (j=0; j<countB; j++) { 807 if (garray[bjj[j]] > rstart) nzb++; 808 } 809 } 810 811 nz = nza + nzb; /* total nz of upper triangular part of mat */ 812 *nnz = nz; 813 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 814 col = row + nz; 815 val = (PetscScalar*)(col + nz); 816 817 *r = row; *c = col; *v = val; 818 } else { 819 row = *r; col = *c; val = *v; 820 } 821 822 jj = 0; irow = rstart; 823 for (i=0; i<m; i++) { 824 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 825 v1 = av + adiag[i]; 826 countA = ai[i+1] - adiag[i]; 827 countB = bi[i+1] - bi[i]; 828 bjj = bj + bi[i]; 829 v2 = bv + bi[i]; 830 831 /* A-part */ 832 for (j=0; j<countA; j++) { 833 if (reuse == MAT_INITIAL_MATRIX) { 834 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 835 } 836 val[jj++] = v1[j]; 837 } 838 839 /* B-part */ 840 for (j=0; j < countB; j++) { 841 if (garray[bjj[j]] > rstart) { 842 if (reuse == MAT_INITIAL_MATRIX) { 843 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 844 } 845 val[jj++] = v2[j]; 846 } 847 } 848 irow++; 849 } 850 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 851 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatDestroy_MUMPS(Mat A) 856 { 857 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 863 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 864 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 865 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 866 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 867 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 868 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 869 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 870 mumps->id.job = JOB_END; 871 PetscMUMPS_c(mumps); 872 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 873 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 874 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 875 #endif 876 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 877 ierr = PetscFree(A->data);CHKERRQ(ierr); 878 879 /* clear composed functions */ 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 892 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 893 PetscFunctionReturn(0); 894 } 895 896 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 897 { 898 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 899 PetscScalar *array; 900 Vec b_seq; 901 IS is_iden,is_petsc; 902 PetscErrorCode ierr; 903 PetscInt i; 904 PetscBool second_solve = PETSC_FALSE; 905 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 906 907 PetscFunctionBegin; 908 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 909 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 910 911 if (A->factorerrortype) { 912 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 913 ierr = VecSetInf(x);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 mumps->id.ICNTL(20) = 0; /* dense RHS */ 918 mumps->id.nrhs = 1; 919 b_seq = mumps->b_seq; 920 if (mumps->petsc_size > 1) { 921 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 922 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 925 } else { /* petsc_size == 1 */ 926 ierr = VecCopy(b,x);CHKERRQ(ierr); 927 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 928 } 929 if (!mumps->myid) { /* define rhs on the host */ 930 mumps->id.nrhs = 1; 931 mumps->id.rhs = (MumpsScalar*)array; 932 } 933 934 /* 935 handle condensation step of Schur complement (if any) 936 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 937 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 938 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 939 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 940 */ 941 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 942 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 943 second_solve = PETSC_TRUE; 944 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 945 } 946 /* solve phase */ 947 /*-------------*/ 948 mumps->id.job = JOB_SOLVE; 949 PetscMUMPS_c(mumps); 950 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 951 952 /* handle expansion step of Schur complement (if any) */ 953 if (second_solve) { 954 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 955 } 956 957 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 958 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 959 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 960 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 961 } 962 if (!mumps->scat_sol) { /* create scatter scat_sol */ 963 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 964 for (i=0; i<mumps->id.lsol_loc; i++) { 965 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 966 } 967 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 968 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 969 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 970 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 971 972 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 973 } 974 975 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 } 978 979 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 980 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 981 982 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 987 { 988 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 mumps->id.ICNTL(9) = 0; 993 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 994 mumps->id.ICNTL(9) = 1; 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 999 { 1000 PetscErrorCode ierr; 1001 Mat Bt = NULL; 1002 PetscBool flg, flgT; 1003 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1004 PetscInt i,nrhs,M; 1005 PetscScalar *array; 1006 const PetscScalar *rbray; 1007 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1008 PetscScalar *bray,*sol_loc,*sol_loc_save; 1009 IS is_to,is_from; 1010 PetscInt k,proc,j,m,myrstart; 1011 const PetscInt *rstart; 1012 Vec v_mpi,b_seq,msol_loc; 1013 VecScatter scat_rhs,scat_sol; 1014 PetscScalar *aa; 1015 PetscInt spnr,*ia,*ja; 1016 Mat_MPIAIJ *b = NULL; 1017 1018 PetscFunctionBegin; 1019 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1020 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1021 1022 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1023 if (flg) { /* dense B */ 1024 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1025 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1026 } else { /* sparse B */ 1027 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1028 if (flgT) { /* input B is transpose of actural RHS matrix, 1029 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1030 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1031 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1032 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1033 } 1034 1035 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1036 mumps->id.nrhs = nrhs; 1037 mumps->id.lrhs = M; 1038 mumps->id.rhs = NULL; 1039 1040 if (mumps->petsc_size == 1) { 1041 PetscScalar *aa; 1042 PetscInt spnr,*ia,*ja; 1043 PetscBool second_solve = PETSC_FALSE; 1044 1045 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1046 mumps->id.rhs = (MumpsScalar*)array; 1047 1048 if (!Bt) { /* dense B */ 1049 /* copy B to X */ 1050 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1051 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1052 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1053 } else { /* sparse B */ 1054 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1055 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1056 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1057 /* mumps requires ia and ja start at 1! */ 1058 mumps->id.irhs_ptr = ia; 1059 mumps->id.irhs_sparse = ja; 1060 mumps->id.nz_rhs = ia[spnr] - 1; 1061 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1062 } 1063 /* handle condensation step of Schur complement (if any) */ 1064 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1065 second_solve = PETSC_TRUE; 1066 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1067 } 1068 /* solve phase */ 1069 /*-------------*/ 1070 mumps->id.job = JOB_SOLVE; 1071 PetscMUMPS_c(mumps); 1072 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1073 1074 /* handle expansion step of Schur complement (if any) */ 1075 if (second_solve) { 1076 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1077 } 1078 if (Bt) { /* sparse B */ 1079 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1080 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1081 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1082 } 1083 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1088 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1089 1090 /* create msol_loc to hold mumps local solution */ 1091 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1092 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1093 1094 lsol_loc = mumps->id.lsol_loc; 1095 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1096 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1097 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1098 mumps->id.isol_loc = isol_loc; 1099 1100 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1101 1102 if (!Bt) { /* dense B */ 1103 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1104 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1105 0, re-arrange B into desired order, which is a local operation. 1106 */ 1107 1108 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1109 /* wrap dense rhs matrix B into a vector v_mpi */ 1110 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1111 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1112 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1113 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1114 1115 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1116 if (!mumps->myid) { 1117 PetscInt *idx; 1118 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1119 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1120 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1121 k = 0; 1122 for (proc=0; proc<mumps->petsc_size; proc++){ 1123 for (j=0; j<nrhs; j++){ 1124 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1125 } 1126 } 1127 1128 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1129 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1130 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1131 } else { 1132 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1133 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1134 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1135 } 1136 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1137 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1139 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 1142 if (!mumps->myid) { /* define rhs on the host */ 1143 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1144 mumps->id.rhs = (MumpsScalar*)bray; 1145 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1146 } 1147 1148 } else { /* sparse B */ 1149 b = (Mat_MPIAIJ*)Bt->data; 1150 1151 /* wrap dense X into a vector v_mpi */ 1152 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1153 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1154 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1155 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1156 1157 if (!mumps->myid) { 1158 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1159 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1160 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1161 /* mumps requires ia and ja start at 1! */ 1162 mumps->id.irhs_ptr = ia; 1163 mumps->id.irhs_sparse = ja; 1164 mumps->id.nz_rhs = ia[spnr] - 1; 1165 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1166 } else { 1167 mumps->id.irhs_ptr = NULL; 1168 mumps->id.irhs_sparse = NULL; 1169 mumps->id.nz_rhs = 0; 1170 mumps->id.rhs_sparse = NULL; 1171 } 1172 } 1173 1174 /* solve phase */ 1175 /*-------------*/ 1176 mumps->id.job = JOB_SOLVE; 1177 PetscMUMPS_c(mumps); 1178 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1179 1180 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1181 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1182 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1183 1184 /* create scatter scat_sol */ 1185 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1186 /* iidx: index for scatter mumps solution to petsc X */ 1187 1188 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1189 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1190 for (i=0; i<lsol_loc; i++) { 1191 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1192 1193 for (proc=0; proc<mumps->petsc_size; proc++){ 1194 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1195 myrstart = rstart[proc]; 1196 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1197 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1198 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1199 break; 1200 } 1201 } 1202 1203 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1204 } 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1206 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1207 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1209 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1210 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1212 1213 /* free spaces */ 1214 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1215 mumps->id.isol_loc = isol_loc_save; 1216 1217 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1218 ierr = PetscFree(idxx);CHKERRQ(ierr); 1219 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1220 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1221 if (Bt) { 1222 if (!mumps->myid) { 1223 b = (Mat_MPIAIJ*)Bt->data; 1224 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1225 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1226 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1227 } 1228 } else { 1229 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1230 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1231 } 1232 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1233 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1238 { 1239 PetscErrorCode ierr; 1240 PetscBool flg; 1241 Mat B; 1242 1243 PetscFunctionBegin; 1244 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1245 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1246 1247 /* Create B=Bt^T that uses Bt's data structure */ 1248 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1249 1250 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1251 ierr = MatDestroy(&B);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #if !defined(PETSC_USE_COMPLEX) 1256 /* 1257 input: 1258 F: numeric factor 1259 output: 1260 nneg: total number of negative pivots 1261 nzero: total number of zero pivots 1262 npos: (global dimension of F) - nneg - nzero 1263 */ 1264 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1265 { 1266 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1267 PetscErrorCode ierr; 1268 PetscMPIInt size; 1269 1270 PetscFunctionBegin; 1271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1272 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1273 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1274 1275 if (nneg) *nneg = mumps->id.INFOG(12); 1276 if (nzero || npos) { 1277 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1278 if (nzero) *nzero = mumps->id.INFOG(28); 1279 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 #endif 1284 1285 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1286 { 1287 PetscErrorCode ierr; 1288 PetscInt i,nz=0,*irn,*jcn=0; 1289 PetscScalar *val=0; 1290 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1291 1292 PetscFunctionBegin; 1293 if (mumps->omp_comm_size > 1) { 1294 if (reuse == MAT_INITIAL_MATRIX) { 1295 /* master first gathers counts of nonzeros to receive */ 1296 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1297 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1298 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1299 1300 /* master allocates memory to receive nonzeros */ 1301 if (mumps->is_omp_master) { 1302 displs[0] = 0; 1303 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1304 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1305 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1306 jcn = irn + nz; 1307 val = (PetscScalar*)(jcn + nz); 1308 } 1309 1310 /* save the gatherv plan */ 1311 mumps->mpinz = mpinz; /* used as send count */ 1312 mumps->recvcount = recvcount; 1313 mumps->displs = displs; 1314 1315 /* master gathers nonzeros */ 1316 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1317 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1318 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1319 1320 /* master frees its row/col/val and replaces them with bigger arrays */ 1321 if (mumps->is_omp_master) { 1322 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1323 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1324 mumps->irn = irn; 1325 mumps->jcn = jcn; 1326 mumps->val = val; 1327 } 1328 } else { 1329 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1330 } 1331 } 1332 PetscFunctionReturn(0); 1333 } 1334 1335 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1336 { 1337 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1338 PetscErrorCode ierr; 1339 PetscBool isMPIAIJ; 1340 1341 PetscFunctionBegin; 1342 if (mumps->id.INFOG(1) < 0) { 1343 if (mumps->id.INFOG(1) == -6) { 1344 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1345 } 1346 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1351 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1352 1353 /* numerical factorization phase */ 1354 /*-------------------------------*/ 1355 mumps->id.job = JOB_FACTNUMERIC; 1356 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1357 if (!mumps->myid) { 1358 mumps->id.a = (MumpsScalar*)mumps->val; 1359 } 1360 } else { 1361 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1362 } 1363 PetscMUMPS_c(mumps); 1364 if (mumps->id.INFOG(1) < 0) { 1365 if (A->erroriffailure) { 1366 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1367 } else { 1368 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1369 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1370 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1371 } else if (mumps->id.INFOG(1) == -13) { 1372 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1373 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1374 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1375 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1376 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1377 } else { 1378 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1379 F->factorerrortype = MAT_FACTOR_OTHER; 1380 } 1381 } 1382 } 1383 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1384 1385 F->assembled = PETSC_TRUE; 1386 mumps->matstruc = SAME_NONZERO_PATTERN; 1387 if (F->schur) { /* reset Schur status to unfactored */ 1388 #if defined(PETSC_HAVE_CUDA) 1389 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1390 #endif 1391 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1392 mumps->id.ICNTL(19) = 2; 1393 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1394 } 1395 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1396 } 1397 1398 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1399 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1400 1401 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1402 if (mumps->petsc_size > 1) { 1403 PetscInt lsol_loc; 1404 PetscScalar *sol_loc; 1405 1406 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1407 1408 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1409 if (mumps->x_seq) { 1410 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1411 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1412 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1413 } 1414 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1415 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1416 mumps->id.lsol_loc = lsol_loc; 1417 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1418 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1419 } 1420 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 /* Sets MUMPS options from the options database */ 1425 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1426 { 1427 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1428 PetscErrorCode ierr; 1429 PetscInt icntl,info[80],i,ninfo=80; 1430 PetscBool flg; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1434 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1435 if (flg) mumps->id.ICNTL(1) = icntl; 1436 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1437 if (flg) mumps->id.ICNTL(2) = icntl; 1438 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1439 if (flg) mumps->id.ICNTL(3) = icntl; 1440 1441 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1442 if (flg) mumps->id.ICNTL(4) = icntl; 1443 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1444 1445 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1446 if (flg) mumps->id.ICNTL(6) = icntl; 1447 1448 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1449 if (flg) { 1450 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1451 else mumps->id.ICNTL(7) = icntl; 1452 } 1453 1454 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1455 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1456 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1458 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1460 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1461 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1462 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1463 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1464 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1465 } 1466 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1467 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1468 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1470 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1471 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1472 if (mumps->id.ICNTL(24)) { 1473 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1474 } 1475 1476 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1477 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1480 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1481 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1482 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1483 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1484 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1487 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1488 1489 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1490 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1491 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1492 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1493 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1494 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1495 1496 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1497 1498 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1499 if (ninfo) { 1500 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1501 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1502 mumps->ninfo = ninfo; 1503 for (i=0; i<ninfo; i++) { 1504 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1505 else mumps->info[i] = info[i]; 1506 } 1507 } 1508 1509 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1514 { 1515 PetscErrorCode ierr; 1516 PetscInt nthreads=0; 1517 1518 PetscFunctionBegin; 1519 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1520 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1521 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1522 1523 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1524 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1525 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1526 if (mumps->use_petsc_omp_support) { 1527 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1528 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1529 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1530 #else 1531 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1532 #endif 1533 } else { 1534 mumps->omp_comm = PETSC_COMM_SELF; 1535 mumps->mumps_comm = mumps->petsc_comm; 1536 mumps->is_omp_master = PETSC_TRUE; 1537 } 1538 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1539 1540 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1541 mumps->id.job = JOB_INIT; 1542 mumps->id.par = 1; /* host participates factorizaton and solve */ 1543 mumps->id.sym = mumps->sym; 1544 1545 PetscMUMPS_c(mumps); 1546 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1547 1548 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1549 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1550 */ 1551 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1552 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1553 1554 mumps->scat_rhs = NULL; 1555 mumps->scat_sol = NULL; 1556 1557 /* set PETSc-MUMPS default options - override MUMPS default */ 1558 mumps->id.ICNTL(3) = 0; 1559 mumps->id.ICNTL(4) = 0; 1560 if (mumps->petsc_size == 1) { 1561 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1562 } else { 1563 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1564 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1565 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1566 } 1567 1568 /* schur */ 1569 mumps->id.size_schur = 0; 1570 mumps->id.listvar_schur = NULL; 1571 mumps->id.schur = NULL; 1572 mumps->sizeredrhs = 0; 1573 mumps->schur_sol = NULL; 1574 mumps->schur_sizesol = 0; 1575 PetscFunctionReturn(0); 1576 } 1577 1578 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1579 { 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1584 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1585 if (mumps->id.INFOG(1) < 0) { 1586 if (A->erroriffailure) { 1587 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1588 } else { 1589 if (mumps->id.INFOG(1) == -6) { 1590 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1591 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1592 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1593 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1594 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1595 } else { 1596 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1597 F->factorerrortype = MAT_FACTOR_OTHER; 1598 } 1599 } 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1605 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1606 { 1607 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1608 PetscErrorCode ierr; 1609 Vec b; 1610 const PetscInt M = A->rmap->N; 1611 1612 PetscFunctionBegin; 1613 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1614 1615 /* Set MUMPS options from the options database */ 1616 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1617 1618 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1619 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1620 1621 /* analysis phase */ 1622 /*----------------*/ 1623 mumps->id.job = JOB_FACTSYMBOLIC; 1624 mumps->id.n = M; 1625 switch (mumps->id.ICNTL(18)) { 1626 case 0: /* centralized assembled matrix input */ 1627 if (!mumps->myid) { 1628 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1629 if (mumps->id.ICNTL(6)>1) { 1630 mumps->id.a = (MumpsScalar*)mumps->val; 1631 } 1632 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1633 /* 1634 PetscBool flag; 1635 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1636 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1637 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1638 */ 1639 if (!mumps->myid) { 1640 const PetscInt *idx; 1641 PetscInt i,*perm_in; 1642 1643 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1644 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1645 1646 mumps->id.perm_in = perm_in; 1647 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1648 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 break; 1653 case 3: /* distributed assembled matrix input (size>1) */ 1654 mumps->id.nz_loc = mumps->nz; 1655 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1656 if (mumps->id.ICNTL(6)>1) { 1657 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1658 } 1659 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1660 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1661 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1662 ierr = VecDestroy(&b);CHKERRQ(ierr); 1663 break; 1664 } 1665 PetscMUMPS_c(mumps); 1666 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1667 1668 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1669 F->ops->solve = MatSolve_MUMPS; 1670 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1671 F->ops->matsolve = MatMatSolve_MUMPS; 1672 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 /* Note the Petsc r and c permutations are ignored */ 1677 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1678 { 1679 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1680 PetscErrorCode ierr; 1681 Vec b; 1682 const PetscInt M = A->rmap->N; 1683 1684 PetscFunctionBegin; 1685 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1686 1687 /* Set MUMPS options from the options database */ 1688 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1689 1690 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1691 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1692 1693 /* analysis phase */ 1694 /*----------------*/ 1695 mumps->id.job = JOB_FACTSYMBOLIC; 1696 mumps->id.n = M; 1697 switch (mumps->id.ICNTL(18)) { 1698 case 0: /* centralized assembled matrix input */ 1699 if (!mumps->myid) { 1700 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1701 if (mumps->id.ICNTL(6)>1) { 1702 mumps->id.a = (MumpsScalar*)mumps->val; 1703 } 1704 } 1705 break; 1706 case 3: /* distributed assembled matrix input (size>1) */ 1707 mumps->id.nz_loc = mumps->nz; 1708 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1709 if (mumps->id.ICNTL(6)>1) { 1710 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1711 } 1712 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1713 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1714 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1715 ierr = VecDestroy(&b);CHKERRQ(ierr); 1716 break; 1717 } 1718 PetscMUMPS_c(mumps); 1719 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1720 1721 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1722 F->ops->solve = MatSolve_MUMPS; 1723 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1724 PetscFunctionReturn(0); 1725 } 1726 1727 /* Note the Petsc r permutation and factor info are ignored */ 1728 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1729 { 1730 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1731 PetscErrorCode ierr; 1732 Vec b; 1733 const PetscInt M = A->rmap->N; 1734 1735 PetscFunctionBegin; 1736 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1737 1738 /* Set MUMPS options from the options database */ 1739 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1740 1741 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1742 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1743 1744 /* analysis phase */ 1745 /*----------------*/ 1746 mumps->id.job = JOB_FACTSYMBOLIC; 1747 mumps->id.n = M; 1748 switch (mumps->id.ICNTL(18)) { 1749 case 0: /* centralized assembled matrix input */ 1750 if (!mumps->myid) { 1751 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1752 if (mumps->id.ICNTL(6)>1) { 1753 mumps->id.a = (MumpsScalar*)mumps->val; 1754 } 1755 } 1756 break; 1757 case 3: /* distributed assembled matrix input (size>1) */ 1758 mumps->id.nz_loc = mumps->nz; 1759 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1760 if (mumps->id.ICNTL(6)>1) { 1761 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1762 } 1763 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1764 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1765 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1766 ierr = VecDestroy(&b);CHKERRQ(ierr); 1767 break; 1768 } 1769 PetscMUMPS_c(mumps); 1770 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1771 1772 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1773 F->ops->solve = MatSolve_MUMPS; 1774 F->ops->solvetranspose = MatSolve_MUMPS; 1775 F->ops->matsolve = MatMatSolve_MUMPS; 1776 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1777 #if defined(PETSC_USE_COMPLEX) 1778 F->ops->getinertia = NULL; 1779 #else 1780 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1781 #endif 1782 PetscFunctionReturn(0); 1783 } 1784 1785 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1786 { 1787 PetscErrorCode ierr; 1788 PetscBool iascii; 1789 PetscViewerFormat format; 1790 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1791 1792 PetscFunctionBegin; 1793 /* check if matrix is mumps type */ 1794 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1795 1796 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1797 if (iascii) { 1798 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1799 if (format == PETSC_VIEWER_ASCII_INFO) { 1800 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1812 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1813 if (mumps->id.ICNTL(11)>0) { 1814 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1819 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1820 } 1821 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1823 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1824 /* ICNTL(15-17) not used */ 1825 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1830 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1831 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1837 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1838 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1844 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1845 1846 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1851 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1852 1853 /* infomation local to each processor */ 1854 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1855 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1856 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1857 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1858 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1859 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1860 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1861 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1862 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1863 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1864 1865 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1866 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1867 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1868 1869 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1870 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1871 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1872 1873 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1874 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1876 1877 if (mumps->ninfo && mumps->ninfo <= 80){ 1878 PetscInt i; 1879 for (i=0; i<mumps->ninfo; i++){ 1880 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1881 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1882 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1883 } 1884 } 1885 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1886 1887 if (!mumps->myid) { /* information from the host */ 1888 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1890 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1891 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1892 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1925 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1926 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1927 } 1928 } 1929 } 1930 PetscFunctionReturn(0); 1931 } 1932 1933 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1934 { 1935 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1936 1937 PetscFunctionBegin; 1938 info->block_size = 1.0; 1939 info->nz_allocated = mumps->id.INFOG(20); 1940 info->nz_used = mumps->id.INFOG(20); 1941 info->nz_unneeded = 0.0; 1942 info->assemblies = 0.0; 1943 info->mallocs = 0.0; 1944 info->memory = 0.0; 1945 info->fill_ratio_given = 0; 1946 info->fill_ratio_needed = 0; 1947 info->factor_mallocs = 0; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* -------------------------------------------------------------------------------------------*/ 1952 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1953 { 1954 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1955 const PetscScalar *arr; 1956 const PetscInt *idxs; 1957 PetscInt size,i; 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1962 if (mumps->petsc_size > 1) { 1963 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1964 1965 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1966 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1967 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1968 } 1969 1970 /* Schur complement matrix */ 1971 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1972 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1973 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1974 mumps->id.schur = (MumpsScalar*)arr; 1975 mumps->id.size_schur = size; 1976 mumps->id.schur_lld = size; 1977 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1978 if (mumps->sym == 1) { 1979 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1980 } 1981 1982 /* MUMPS expects Fortran style indices */ 1983 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1984 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1985 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1986 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1987 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1988 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1989 if (mumps->petsc_size > 1) { 1990 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1991 } else { 1992 if (F->factortype == MAT_FACTOR_LU) { 1993 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1994 } else { 1995 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1996 } 1997 } 1998 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1999 mumps->id.ICNTL(26) = -1; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 /* -------------------------------------------------------------------------------------------*/ 2004 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2005 { 2006 Mat St; 2007 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2008 PetscScalar *array; 2009 #if defined(PETSC_USE_COMPLEX) 2010 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2011 #endif 2012 PetscErrorCode ierr; 2013 2014 PetscFunctionBegin; 2015 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2016 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2017 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2018 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2019 ierr = MatSetUp(St);CHKERRQ(ierr); 2020 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2021 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2022 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2023 PetscInt i,j,N=mumps->id.size_schur; 2024 for (i=0;i<N;i++) { 2025 for (j=0;j<N;j++) { 2026 #if !defined(PETSC_USE_COMPLEX) 2027 PetscScalar val = mumps->id.schur[i*N+j]; 2028 #else 2029 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2030 #endif 2031 array[j*N+i] = val; 2032 } 2033 } 2034 } else { /* stored by columns */ 2035 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2036 } 2037 } else { /* either full or lower-triangular (not packed) */ 2038 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2039 PetscInt i,j,N=mumps->id.size_schur; 2040 for (i=0;i<N;i++) { 2041 for (j=i;j<N;j++) { 2042 #if !defined(PETSC_USE_COMPLEX) 2043 PetscScalar val = mumps->id.schur[i*N+j]; 2044 #else 2045 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2046 #endif 2047 array[i*N+j] = val; 2048 array[j*N+i] = val; 2049 } 2050 } 2051 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2052 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2053 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2054 PetscInt i,j,N=mumps->id.size_schur; 2055 for (i=0;i<N;i++) { 2056 for (j=0;j<i+1;j++) { 2057 #if !defined(PETSC_USE_COMPLEX) 2058 PetscScalar val = mumps->id.schur[i*N+j]; 2059 #else 2060 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2061 #endif 2062 array[i*N+j] = val; 2063 array[j*N+i] = val; 2064 } 2065 } 2066 } 2067 } 2068 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2069 *S = St; 2070 PetscFunctionReturn(0); 2071 } 2072 2073 /* -------------------------------------------------------------------------------------------*/ 2074 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2075 { 2076 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2077 2078 PetscFunctionBegin; 2079 mumps->id.ICNTL(icntl) = ival; 2080 PetscFunctionReturn(0); 2081 } 2082 2083 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2084 { 2085 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2086 2087 PetscFunctionBegin; 2088 *ival = mumps->id.ICNTL(icntl); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 /*@ 2093 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2094 2095 Logically Collective on Mat 2096 2097 Input Parameters: 2098 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2099 . icntl - index of MUMPS parameter array ICNTL() 2100 - ival - value of MUMPS ICNTL(icntl) 2101 2102 Options Database: 2103 . -mat_mumps_icntl_<icntl> <ival> 2104 2105 Level: beginner 2106 2107 References: 2108 . MUMPS Users' Guide 2109 2110 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2111 @*/ 2112 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2113 { 2114 PetscErrorCode ierr; 2115 2116 PetscFunctionBegin; 2117 PetscValidType(F,1); 2118 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2119 PetscValidLogicalCollectiveInt(F,icntl,2); 2120 PetscValidLogicalCollectiveInt(F,ival,3); 2121 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /*@ 2126 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2127 2128 Logically Collective on Mat 2129 2130 Input Parameters: 2131 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2132 - icntl - index of MUMPS parameter array ICNTL() 2133 2134 Output Parameter: 2135 . ival - value of MUMPS ICNTL(icntl) 2136 2137 Level: beginner 2138 2139 References: 2140 . MUMPS Users' Guide 2141 2142 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2143 @*/ 2144 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2145 { 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 PetscValidType(F,1); 2150 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2151 PetscValidLogicalCollectiveInt(F,icntl,2); 2152 PetscValidIntPointer(ival,3); 2153 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /* -------------------------------------------------------------------------------------------*/ 2158 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2159 { 2160 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2161 2162 PetscFunctionBegin; 2163 mumps->id.CNTL(icntl) = val; 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2168 { 2169 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2170 2171 PetscFunctionBegin; 2172 *val = mumps->id.CNTL(icntl); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /*@ 2177 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2178 2179 Logically Collective on Mat 2180 2181 Input Parameters: 2182 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2183 . icntl - index of MUMPS parameter array CNTL() 2184 - val - value of MUMPS CNTL(icntl) 2185 2186 Options Database: 2187 . -mat_mumps_cntl_<icntl> <val> 2188 2189 Level: beginner 2190 2191 References: 2192 . MUMPS Users' Guide 2193 2194 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2195 @*/ 2196 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2197 { 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 PetscValidType(F,1); 2202 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2203 PetscValidLogicalCollectiveInt(F,icntl,2); 2204 PetscValidLogicalCollectiveReal(F,val,3); 2205 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /*@ 2210 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2211 2212 Logically Collective on Mat 2213 2214 Input Parameters: 2215 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2216 - icntl - index of MUMPS parameter array CNTL() 2217 2218 Output Parameter: 2219 . val - value of MUMPS CNTL(icntl) 2220 2221 Level: beginner 2222 2223 References: 2224 . MUMPS Users' Guide 2225 2226 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2227 @*/ 2228 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2229 { 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 PetscValidType(F,1); 2234 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2235 PetscValidLogicalCollectiveInt(F,icntl,2); 2236 PetscValidRealPointer(val,3); 2237 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2242 { 2243 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2244 2245 PetscFunctionBegin; 2246 *info = mumps->id.INFO(icntl); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2251 { 2252 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2253 2254 PetscFunctionBegin; 2255 *infog = mumps->id.INFOG(icntl); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2260 { 2261 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2262 2263 PetscFunctionBegin; 2264 *rinfo = mumps->id.RINFO(icntl); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2269 { 2270 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2271 2272 PetscFunctionBegin; 2273 *rinfog = mumps->id.RINFOG(icntl); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2278 { 2279 PetscErrorCode ierr; 2280 Mat Bt = NULL,Btseq = NULL; 2281 PetscBool flg; 2282 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2283 PetscScalar *aa; 2284 PetscInt spnr,*ia,*ja; 2285 2286 PetscFunctionBegin; 2287 PetscValidIntPointer(spRHS,2); 2288 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2289 if (flg) { 2290 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2291 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2292 2293 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2294 2295 if (mumps->petsc_size > 1) { 2296 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2297 Btseq = b->A; 2298 } else { 2299 Btseq = Bt; 2300 } 2301 2302 if (!mumps->myid) { 2303 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2304 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2305 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2306 2307 mumps->id.irhs_ptr = ia; 2308 mumps->id.irhs_sparse = ja; 2309 mumps->id.nz_rhs = ia[spnr] - 1; 2310 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2311 } else { 2312 mumps->id.irhs_ptr = NULL; 2313 mumps->id.irhs_sparse = NULL; 2314 mumps->id.nz_rhs = 0; 2315 mumps->id.rhs_sparse = NULL; 2316 } 2317 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2318 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2319 2320 /* solve phase */ 2321 /*-------------*/ 2322 mumps->id.job = JOB_SOLVE; 2323 PetscMUMPS_c(mumps); 2324 if (mumps->id.INFOG(1) < 0) 2325 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2326 2327 if (!mumps->myid) { 2328 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2329 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2330 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2331 } 2332 PetscFunctionReturn(0); 2333 } 2334 2335 /*@ 2336 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2337 2338 Logically Collective on Mat 2339 2340 Input Parameters: 2341 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2342 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2343 2344 Output Parameter: 2345 . spRHS - requested entries of inverse of A 2346 2347 Level: beginner 2348 2349 References: 2350 . MUMPS Users' Guide 2351 2352 .seealso: MatGetFactor(), MatCreateTranspose() 2353 @*/ 2354 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2355 { 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 PetscValidType(F,1); 2360 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2361 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2366 { 2367 PetscErrorCode ierr; 2368 Mat spRHS; 2369 2370 PetscFunctionBegin; 2371 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2372 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2373 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 /*@ 2378 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2379 2380 Logically Collective on Mat 2381 2382 Input Parameters: 2383 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2384 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2385 2386 Output Parameter: 2387 . spRHST - requested entries of inverse of A^T 2388 2389 Level: beginner 2390 2391 References: 2392 . MUMPS Users' Guide 2393 2394 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2395 @*/ 2396 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2397 { 2398 PetscErrorCode ierr; 2399 PetscBool flg; 2400 2401 PetscFunctionBegin; 2402 PetscValidType(F,1); 2403 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2404 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2405 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2406 2407 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 /*@ 2412 MatMumpsGetInfo - Get MUMPS parameter INFO() 2413 2414 Logically Collective on Mat 2415 2416 Input Parameters: 2417 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2418 - icntl - index of MUMPS parameter array INFO() 2419 2420 Output Parameter: 2421 . ival - value of MUMPS INFO(icntl) 2422 2423 Level: beginner 2424 2425 References: 2426 . MUMPS Users' Guide 2427 2428 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2429 @*/ 2430 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2431 { 2432 PetscErrorCode ierr; 2433 2434 PetscFunctionBegin; 2435 PetscValidType(F,1); 2436 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2437 PetscValidIntPointer(ival,3); 2438 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 /*@ 2443 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2444 2445 Logically Collective on Mat 2446 2447 Input Parameters: 2448 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2449 - icntl - index of MUMPS parameter array INFOG() 2450 2451 Output Parameter: 2452 . ival - value of MUMPS INFOG(icntl) 2453 2454 Level: beginner 2455 2456 References: 2457 . MUMPS Users' Guide 2458 2459 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2460 @*/ 2461 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2462 { 2463 PetscErrorCode ierr; 2464 2465 PetscFunctionBegin; 2466 PetscValidType(F,1); 2467 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2468 PetscValidIntPointer(ival,3); 2469 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 /*@ 2474 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2475 2476 Logically Collective on Mat 2477 2478 Input Parameters: 2479 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2480 - icntl - index of MUMPS parameter array RINFO() 2481 2482 Output Parameter: 2483 . val - value of MUMPS RINFO(icntl) 2484 2485 Level: beginner 2486 2487 References: 2488 . MUMPS Users' Guide 2489 2490 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2491 @*/ 2492 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2493 { 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 PetscValidType(F,1); 2498 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2499 PetscValidRealPointer(val,3); 2500 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /*@ 2505 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2506 2507 Logically Collective on Mat 2508 2509 Input Parameters: 2510 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2511 - icntl - index of MUMPS parameter array RINFOG() 2512 2513 Output Parameter: 2514 . val - value of MUMPS RINFOG(icntl) 2515 2516 Level: beginner 2517 2518 References: 2519 . MUMPS Users' Guide 2520 2521 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2522 @*/ 2523 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2524 { 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 PetscValidType(F,1); 2529 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2530 PetscValidRealPointer(val,3); 2531 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 /*MC 2536 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2537 distributed and sequential matrices via the external package MUMPS. 2538 2539 Works with MATAIJ and MATSBAIJ matrices 2540 2541 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2542 2543 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2544 2545 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2546 2547 Options Database Keys: 2548 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2549 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2550 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2551 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2552 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2553 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2554 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2555 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2556 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2557 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2558 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2559 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2560 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2561 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2562 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2563 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2564 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2565 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2566 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2567 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2568 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2569 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2570 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2571 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2572 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2573 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2574 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2575 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2576 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2577 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2578 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2579 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2580 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2581 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2582 2583 Level: beginner 2584 2585 Notes: 2586 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2587 2588 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2589 $ KSPGetPC(ksp,&pc); 2590 $ PCFactorGetMatrix(pc,&mat); 2591 $ MatMumpsGetInfo(mat,....); 2592 $ MatMumpsGetInfog(mat,....); etc. 2593 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2594 2595 Two modes to run MUMPS/PETSc with OpenMP 2596 2597 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2598 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2599 2600 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2601 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2602 2603 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2604 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2605 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2606 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2607 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2608 2609 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2610 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2611 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2612 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2613 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2614 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2615 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2616 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2617 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2618 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2619 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2620 examine the mapping result. 2621 2622 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2623 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2624 calls omp_set_num_threads(m) internally before calling MUMPS. 2625 2626 References: 2627 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2628 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2629 2630 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2631 2632 M*/ 2633 2634 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2635 { 2636 PetscFunctionBegin; 2637 *type = MATSOLVERMUMPS; 2638 PetscFunctionReturn(0); 2639 } 2640 2641 /* MatGetFactor for Seq and MPI AIJ matrices */ 2642 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2643 { 2644 Mat B; 2645 PetscErrorCode ierr; 2646 Mat_MUMPS *mumps; 2647 PetscBool isSeqAIJ; 2648 2649 PetscFunctionBegin; 2650 /* Create the factorization matrix */ 2651 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2652 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2653 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2654 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2655 ierr = MatSetUp(B);CHKERRQ(ierr); 2656 2657 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2658 2659 B->ops->view = MatView_MUMPS; 2660 B->ops->getinfo = MatGetInfo_MUMPS; 2661 2662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2675 2676 if (ftype == MAT_FACTOR_LU) { 2677 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2678 B->factortype = MAT_FACTOR_LU; 2679 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2680 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2681 mumps->sym = 0; 2682 } else { 2683 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2684 B->factortype = MAT_FACTOR_CHOLESKY; 2685 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2686 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2687 #if defined(PETSC_USE_COMPLEX) 2688 mumps->sym = 2; 2689 #else 2690 if (A->spd_set && A->spd) mumps->sym = 1; 2691 else mumps->sym = 2; 2692 #endif 2693 } 2694 2695 /* set solvertype */ 2696 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2697 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2698 2699 B->ops->destroy = MatDestroy_MUMPS; 2700 B->data = (void*)mumps; 2701 2702 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2703 2704 *F = B; 2705 PetscFunctionReturn(0); 2706 } 2707 2708 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2709 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2710 { 2711 Mat B; 2712 PetscErrorCode ierr; 2713 Mat_MUMPS *mumps; 2714 PetscBool isSeqSBAIJ; 2715 2716 PetscFunctionBegin; 2717 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2718 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2719 /* Create the factorization matrix */ 2720 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2721 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2722 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2723 ierr = MatSetUp(B);CHKERRQ(ierr); 2724 2725 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2726 if (isSeqSBAIJ) { 2727 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2728 } else { 2729 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2730 } 2731 2732 B->ops->getinfo = MatGetInfo_External; 2733 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2734 B->ops->view = MatView_MUMPS; 2735 2736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2749 2750 B->factortype = MAT_FACTOR_CHOLESKY; 2751 #if defined(PETSC_USE_COMPLEX) 2752 mumps->sym = 2; 2753 #else 2754 if (A->spd_set && A->spd) mumps->sym = 1; 2755 else mumps->sym = 2; 2756 #endif 2757 2758 /* set solvertype */ 2759 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2760 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2761 2762 B->ops->destroy = MatDestroy_MUMPS; 2763 B->data = (void*)mumps; 2764 2765 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2766 2767 *F = B; 2768 PetscFunctionReturn(0); 2769 } 2770 2771 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2772 { 2773 Mat B; 2774 PetscErrorCode ierr; 2775 Mat_MUMPS *mumps; 2776 PetscBool isSeqBAIJ; 2777 2778 PetscFunctionBegin; 2779 /* Create the factorization matrix */ 2780 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2781 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2782 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2783 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2784 ierr = MatSetUp(B);CHKERRQ(ierr); 2785 2786 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2787 if (ftype == MAT_FACTOR_LU) { 2788 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2789 B->factortype = MAT_FACTOR_LU; 2790 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2791 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2792 mumps->sym = 0; 2793 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2794 2795 B->ops->getinfo = MatGetInfo_External; 2796 B->ops->view = MatView_MUMPS; 2797 2798 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2799 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2800 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2801 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2802 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2803 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2804 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2806 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2809 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2811 2812 /* set solvertype */ 2813 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2814 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2815 2816 B->ops->destroy = MatDestroy_MUMPS; 2817 B->data = (void*)mumps; 2818 2819 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2820 2821 *F = B; 2822 PetscFunctionReturn(0); 2823 } 2824 2825 /* MatGetFactor for Seq and MPI SELL matrices */ 2826 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2827 { 2828 Mat B; 2829 PetscErrorCode ierr; 2830 Mat_MUMPS *mumps; 2831 PetscBool isSeqSELL; 2832 2833 PetscFunctionBegin; 2834 /* Create the factorization matrix */ 2835 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2836 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2837 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2838 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2839 ierr = MatSetUp(B);CHKERRQ(ierr); 2840 2841 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2842 2843 B->ops->view = MatView_MUMPS; 2844 B->ops->getinfo = MatGetInfo_MUMPS; 2845 2846 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2847 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2848 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2849 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2850 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2856 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2857 2858 if (ftype == MAT_FACTOR_LU) { 2859 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2860 B->factortype = MAT_FACTOR_LU; 2861 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2862 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2863 mumps->sym = 0; 2864 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2865 2866 /* set solvertype */ 2867 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2868 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2869 2870 B->ops->destroy = MatDestroy_MUMPS; 2871 B->data = (void*)mumps; 2872 2873 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2874 2875 *F = B; 2876 PetscFunctionReturn(0); 2877 } 2878 2879 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2880 { 2881 PetscErrorCode ierr; 2882 2883 PetscFunctionBegin; 2884 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2885 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2886 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2887 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2888 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2889 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2890 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2891 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2892 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2893 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2894 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2895 PetscFunctionReturn(0); 2896 } 2897 2898