1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 47 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 48 #define PetscMUMPS_c(mumps) \ 49 do { \ 50 if (mumps->use_petsc_omp_support) { \ 51 if (mumps->is_omp_master) { \ 52 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 53 MUMPS_c(&mumps->id); \ 54 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 55 } \ 56 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 57 } else { \ 58 MUMPS_c(&mumps->id); \ 59 } \ 60 } while(0) 61 #else 62 #define PetscMUMPS_c(mumps) \ 63 do { MUMPS_c(&mumps->id); } while (0) 64 #endif 65 66 /* declare MumpsScalar */ 67 #if defined(PETSC_USE_COMPLEX) 68 #if defined(PETSC_USE_REAL_SINGLE) 69 #define MumpsScalar mumps_complex 70 #else 71 #define MumpsScalar mumps_double_complex 72 #endif 73 #else 74 #define MumpsScalar PetscScalar 75 #endif 76 77 /* macros s.t. indices match MUMPS documentation */ 78 #define ICNTL(I) icntl[(I)-1] 79 #define CNTL(I) cntl[(I)-1] 80 #define INFOG(I) infog[(I)-1] 81 #define INFO(I) info[(I)-1] 82 #define RINFOG(I) rinfog[(I)-1] 83 #define RINFO(I) rinfo[(I)-1] 84 85 typedef struct { 86 #if defined(PETSC_USE_COMPLEX) 87 #if defined(PETSC_USE_REAL_SINGLE) 88 CMUMPS_STRUC_C id; 89 #else 90 ZMUMPS_STRUC_C id; 91 #endif 92 #else 93 #if defined(PETSC_USE_REAL_SINGLE) 94 SMUMPS_STRUC_C id; 95 #else 96 DMUMPS_STRUC_C id; 97 #endif 98 #endif 99 100 MatStructure matstruc; 101 PetscMPIInt myid,petsc_size; 102 PetscInt *irn,*jcn,nz,sym; 103 PetscScalar *val; 104 MPI_Comm mumps_comm; 105 PetscInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 106 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 107 Vec b_seq,x_seq; 108 PetscInt ninfo,*info; /* display INFO */ 109 PetscInt sizeredrhs; 110 PetscScalar *schur_sol; 111 PetscInt schur_sizesol; 112 113 PetscBool use_petsc_omp_support; 114 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 115 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 116 PetscMPIInt mpinz; /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/ 117 PetscMPIInt omp_comm_size; 118 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 119 PetscMPIInt *recvcount,*displs; 120 121 PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**); 122 } Mat_MUMPS; 123 124 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 125 126 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 132 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 133 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 134 mumps->id.size_schur = 0; 135 mumps->id.schur_lld = 0; 136 mumps->id.ICNTL(19) = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* solve with rhs in mumps->id.redrhs and return in the same location */ 141 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 142 { 143 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 144 Mat S,B,X; 145 MatFactorSchurStatus schurstatus; 146 PetscInt sizesol; 147 PetscErrorCode ierr; 148 149 PetscFunctionBegin; 150 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 151 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 153 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 154 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 155 ierr = MatPinToCPU(B,S->pinnedtocpu);CHKERRQ(ierr); 156 #endif 157 switch (schurstatus) { 158 case MAT_FACTOR_SCHUR_FACTORED: 159 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 160 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 161 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 162 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 163 #endif 164 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 165 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 166 } else { 167 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 168 } 169 break; 170 case MAT_FACTOR_SCHUR_INVERTED: 171 sizesol = mumps->id.nrhs*mumps->id.size_schur; 172 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 173 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 174 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 175 mumps->schur_sizesol = sizesol; 176 } 177 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 178 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 179 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 180 ierr = MatPinToCPU(X,S->pinnedtocpu);CHKERRQ(ierr); 181 #endif 182 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 183 ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 184 } else { 185 ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr); 186 } 187 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 188 break; 189 default: 190 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 191 break; 192 } 193 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 194 ierr = MatDestroy(&B);CHKERRQ(ierr); 195 ierr = MatDestroy(&X);CHKERRQ(ierr); 196 PetscFunctionReturn(0); 197 } 198 199 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 200 { 201 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 202 PetscErrorCode ierr; 203 204 PetscFunctionBegin; 205 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 206 PetscFunctionReturn(0); 207 } 208 if (!expansion) { /* prepare for the condensation step */ 209 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 210 /* allocate MUMPS internal array to store reduced right-hand sides */ 211 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 212 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 213 mumps->id.lredrhs = mumps->id.size_schur; 214 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 215 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 216 } 217 mumps->id.ICNTL(26) = 1; /* condensation phase */ 218 } else { /* prepare for the expansion step */ 219 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 220 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 221 mumps->id.ICNTL(26) = 2; /* expansion phase */ 222 PetscMUMPS_c(mumps); 223 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 224 /* restore defaults */ 225 mumps->id.ICNTL(26) = -1; 226 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 227 if (mumps->id.nrhs > 1) { 228 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 229 mumps->id.lredrhs = 0; 230 mumps->sizeredrhs = 0; 231 } 232 } 233 PetscFunctionReturn(0); 234 } 235 236 /* 237 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 238 239 input: 240 A - matrix in aij,baij or sbaij format 241 shift - 0: C style output triple; 1: Fortran style output triple. 242 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 243 MAT_REUSE_MATRIX: only the values in v array are updated 244 output: 245 nnz - dim of r, c, and v (number of local nonzero entries of A) 246 r, c, v - row and col index, matrix values (matrix triples) 247 248 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 249 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 250 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 251 252 */ 253 254 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 255 { 256 const PetscScalar *av; 257 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 258 PetscInt nz,rnz,i,j; 259 PetscErrorCode ierr; 260 PetscInt *row,*col; 261 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 262 263 PetscFunctionBegin; 264 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 265 *v = (PetscScalar*)av; 266 if (reuse == MAT_INITIAL_MATRIX) { 267 nz = aa->nz; 268 ai = aa->i; 269 aj = aa->j; 270 *nnz = nz; 271 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 272 col = row + nz; 273 274 nz = 0; 275 for (i=0; i<M; i++) { 276 rnz = ai[i+1] - ai[i]; 277 ajj = aj + ai[i]; 278 for (j=0; j<rnz; j++) { 279 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 280 } 281 } 282 *r = row; *c = col; 283 } 284 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 289 { 290 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 291 PetscInt *ptr; 292 293 PetscFunctionBegin; 294 *v = a->val; 295 if (reuse == MAT_INITIAL_MATRIX) { 296 PetscInt nz,i,j,row; 297 PetscErrorCode ierr; 298 299 nz = a->sliidx[a->totalslices]; 300 *nnz = nz; 301 ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr); 302 *r = ptr; 303 *c = ptr + nz; 304 305 for (i=0; i<a->totalslices; i++) { 306 for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) { 307 *ptr++ = 8*i + row + shift; 308 } 309 } 310 for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift; 311 } 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 316 { 317 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 318 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 319 PetscInt bs,M,nz,idx=0,rnz,i,j,k,m; 320 PetscErrorCode ierr; 321 PetscInt *row,*col; 322 323 PetscFunctionBegin; 324 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 325 M = A->rmap->N/bs; 326 *v = aa->a; 327 if (reuse == MAT_INITIAL_MATRIX) { 328 ai = aa->i; aj = aa->j; 329 nz = bs2*aa->nz; 330 *nnz = nz; 331 ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr); 332 col = row + nz; 333 334 for (i=0; i<M; i++) { 335 ajj = aj + ai[i]; 336 rnz = ai[i+1] - ai[i]; 337 for (k=0; k<rnz; k++) { 338 for (j=0; j<bs; j++) { 339 for (m=0; m<bs; m++) { 340 row[idx] = i*bs + m + shift; 341 col[idx++] = bs*(ajj[k]) + j + shift; 342 } 343 } 344 } 345 } 346 *r = row; *c = col; 347 } 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v) 352 { 353 const PetscInt *ai, *aj,*ajj; 354 PetscInt nz,rnz,i,j,k,m,bs; 355 PetscErrorCode ierr; 356 PetscInt *row,*col; 357 PetscScalar *val; 358 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 359 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 360 #if defined(PETSC_USE_COMPLEX) 361 PetscBool hermitian; 362 #endif 363 364 PetscFunctionBegin; 365 #if defined(PETSC_USE_COMPLEX) 366 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 367 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 368 #endif 369 ai = aa->i; 370 aj = aa->j; 371 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 372 if (reuse == MAT_INITIAL_MATRIX) { 373 nz = aa->nz; 374 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr); 375 col = row + bs2*nz; 376 if (bs>1) val = (PetscScalar*)(col + bs2*nz); 377 else val = aa->a; 378 379 *r = row; *c = col; *v = val; 380 } else { 381 if (bs == 1) *v = aa->a; 382 row = *r; col = *c; val = *v; 383 } 384 385 nz = 0; 386 if (bs>1) { 387 for (i=0; i<mbs; i++) { 388 rnz = ai[i+1] - ai[i]; 389 ajj = aj + ai[i]; 390 for (j=0; j<rnz; j++) { 391 for (k=0; k<bs; k++) { 392 for (m=0; m<bs; m++) { 393 if (ajj[j]>i || k>=m) { 394 if (reuse == MAT_INITIAL_MATRIX) { 395 row[nz] = i*bs + m + shift; 396 col[nz] = ajj[j]*bs + k + shift; 397 } 398 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 399 } 400 } 401 } 402 } 403 } 404 } else if (reuse == MAT_INITIAL_MATRIX) { 405 for (i=0; i<mbs; i++) { 406 rnz = ai[i+1] - ai[i]; 407 ajj = aj + ai[i]; 408 for (j=0; j<rnz; j++) { 409 row[nz] = i+shift; col[nz++] = ajj[j] + shift; 410 } 411 } 412 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 413 } 414 if (reuse == MAT_INITIAL_MATRIX) *nnz = nz; 415 PetscFunctionReturn(0); 416 } 417 418 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 419 { 420 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 421 PetscInt nz,rnz,i,j; 422 const PetscScalar *av,*v1; 423 PetscScalar *val; 424 PetscErrorCode ierr; 425 PetscInt *row,*col; 426 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 427 PetscBool missing; 428 #if defined(PETSC_USE_COMPLEX) 429 PetscBool hermitian; 430 #endif 431 432 PetscFunctionBegin; 433 #if defined(PETSC_USE_COMPLEX) 434 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 435 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 436 #endif 437 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 438 ai = aa->i; aj = aa->j; 439 adiag = aa->diag; 440 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr); 441 if (reuse == MAT_INITIAL_MATRIX) { 442 /* count nz in the upper triangular part of A */ 443 nz = 0; 444 if (missing) { 445 for (i=0; i<M; i++) { 446 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 447 for (j=ai[i];j<ai[i+1];j++) { 448 if (aj[j] < i) continue; 449 nz++; 450 } 451 } else { 452 nz += ai[i+1] - adiag[i]; 453 } 454 } 455 } else { 456 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 457 } 458 *nnz = nz; 459 460 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 461 col = row + nz; 462 val = (PetscScalar*)(col + nz); 463 464 nz = 0; 465 if (missing) { 466 for (i=0; i<M; i++) { 467 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 468 for (j=ai[i];j<ai[i+1];j++) { 469 if (aj[j] < i) continue; 470 row[nz] = i+shift; 471 col[nz] = aj[j]+shift; 472 val[nz] = av[j]; 473 nz++; 474 } 475 } else { 476 rnz = ai[i+1] - adiag[i]; 477 ajj = aj + adiag[i]; 478 v1 = av + adiag[i]; 479 for (j=0; j<rnz; j++) { 480 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 481 } 482 } 483 } 484 } else { 485 for (i=0; i<M; i++) { 486 rnz = ai[i+1] - adiag[i]; 487 ajj = aj + adiag[i]; 488 v1 = av + adiag[i]; 489 for (j=0; j<rnz; j++) { 490 row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j]; 491 } 492 } 493 } 494 *r = row; *c = col; *v = val; 495 } else { 496 nz = 0; val = *v; 497 if (missing) { 498 for (i=0; i <M; i++) { 499 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 500 for (j=ai[i];j<ai[i+1];j++) { 501 if (aj[j] < i) continue; 502 val[nz++] = av[j]; 503 } 504 } else { 505 rnz = ai[i+1] - adiag[i]; 506 v1 = av + adiag[i]; 507 for (j=0; j<rnz; j++) { 508 val[nz++] = v1[j]; 509 } 510 } 511 } 512 } else { 513 for (i=0; i <M; i++) { 514 rnz = ai[i+1] - adiag[i]; 515 v1 = av + adiag[i]; 516 for (j=0; j<rnz; j++) { 517 val[nz++] = v1[j]; 518 } 519 } 520 } 521 } 522 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v) 527 { 528 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 529 PetscErrorCode ierr; 530 PetscInt rstart,nz,bs,i,j,k,m,jj,irow,countA,countB; 531 PetscInt *row,*col; 532 const PetscScalar *av,*bv,*v1,*v2; 533 PetscScalar *val; 534 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 535 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 536 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 537 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 538 #if defined(PETSC_USE_COMPLEX) 539 PetscBool hermitian; 540 #endif 541 542 PetscFunctionBegin; 543 #if defined(PETSC_USE_COMPLEX) 544 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 545 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 546 #endif 547 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 548 rstart = A->rmap->rstart; 549 ai = aa->i; 550 aj = aa->j; 551 bi = bb->i; 552 bj = bb->j; 553 av = aa->a; 554 bv = bb->a; 555 556 garray = mat->garray; 557 558 if (reuse == MAT_INITIAL_MATRIX) { 559 nz = aa->nz + bb->nz; 560 ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 561 col = row + bs2*nz; 562 val = (PetscScalar*)(col + bs2*nz); 563 564 *r = row; *c = col; *v = val; 565 } else { 566 row = *r; col = *c; val = *v; 567 } 568 569 jj = 0; irow = rstart; 570 for (i=0; i<mbs; i++) { 571 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 572 countA = ai[i+1] - ai[i]; 573 countB = bi[i+1] - bi[i]; 574 bjj = bj + bi[i]; 575 v1 = av + ai[i]*bs2; 576 v2 = bv + bi[i]*bs2; 577 578 if (bs>1) { 579 /* A-part */ 580 for (j=0; j<countA; j++) { 581 for (k=0; k<bs; k++) { 582 for (m=0; m<bs; m++) { 583 if (rstart + ajj[j]*bs>irow || k>=m) { 584 if (reuse == MAT_INITIAL_MATRIX) { 585 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift; 586 } 587 val[jj++] = v1[j*bs2 + m + k*bs]; 588 } 589 } 590 } 591 } 592 593 /* B-part */ 594 for (j=0; j < countB; j++) { 595 for (k=0; k<bs; k++) { 596 for (m=0; m<bs; m++) { 597 if (reuse == MAT_INITIAL_MATRIX) { 598 row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift; 599 } 600 val[jj++] = v2[j*bs2 + m + k*bs]; 601 } 602 } 603 } 604 } else { 605 /* A-part */ 606 for (j=0; j<countA; j++) { 607 if (reuse == MAT_INITIAL_MATRIX) { 608 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 609 } 610 val[jj++] = v1[j]; 611 } 612 613 /* B-part */ 614 for (j=0; j < countB; j++) { 615 if (reuse == MAT_INITIAL_MATRIX) { 616 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 617 } 618 val[jj++] = v2[j]; 619 } 620 } 621 irow+=bs; 622 } 623 *nnz = jj; 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 628 { 629 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 630 PetscErrorCode ierr; 631 PetscInt rstart,nz,i,j,jj,irow,countA,countB; 632 PetscInt *row,*col; 633 const PetscScalar *av, *bv,*v1,*v2; 634 PetscScalar *val; 635 Mat Ad,Ao; 636 Mat_SeqAIJ *aa; 637 Mat_SeqAIJ *bb; 638 639 PetscFunctionBegin; 640 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 641 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 642 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 643 644 aa = (Mat_SeqAIJ*)(Ad)->data; 645 bb = (Mat_SeqAIJ*)(Ao)->data; 646 ai = aa->i; 647 aj = aa->j; 648 bi = bb->i; 649 bj = bb->j; 650 651 rstart = A->rmap->rstart; 652 653 if (reuse == MAT_INITIAL_MATRIX) { 654 nz = aa->nz + bb->nz; 655 *nnz = nz; 656 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 657 col = row + nz; 658 val = (PetscScalar*)(col + nz); 659 660 *r = row; *c = col; *v = val; 661 } else { 662 row = *r; col = *c; val = *v; 663 } 664 665 jj = 0; irow = rstart; 666 for (i=0; i<m; i++) { 667 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 668 countA = ai[i+1] - ai[i]; 669 countB = bi[i+1] - bi[i]; 670 bjj = bj + bi[i]; 671 v1 = av + ai[i]; 672 v2 = bv + bi[i]; 673 674 /* A-part */ 675 for (j=0; j<countA; j++) { 676 if (reuse == MAT_INITIAL_MATRIX) { 677 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 678 } 679 val[jj++] = v1[j]; 680 } 681 682 /* B-part */ 683 for (j=0; j < countB; j++) { 684 if (reuse == MAT_INITIAL_MATRIX) { 685 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 686 } 687 val[jj++] = v2[j]; 688 } 689 irow++; 690 } 691 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 692 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 697 { 698 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 699 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 700 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 701 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 702 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 703 const PetscInt bs2=mat->bs2; 704 PetscErrorCode ierr; 705 PetscInt bs,nz,i,j,k,n,jj,irow,countA,countB,idx; 706 PetscInt *row,*col; 707 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 708 PetscScalar *val; 709 710 PetscFunctionBegin; 711 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 712 if (reuse == MAT_INITIAL_MATRIX) { 713 nz = bs2*(aa->nz + bb->nz); 714 *nnz = nz; 715 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 716 col = row + nz; 717 val = (PetscScalar*)(col + nz); 718 719 *r = row; *c = col; *v = val; 720 } else { 721 row = *r; col = *c; val = *v; 722 } 723 724 jj = 0; irow = rstart; 725 for (i=0; i<mbs; i++) { 726 countA = ai[i+1] - ai[i]; 727 countB = bi[i+1] - bi[i]; 728 ajj = aj + ai[i]; 729 bjj = bj + bi[i]; 730 v1 = av + bs2*ai[i]; 731 v2 = bv + bs2*bi[i]; 732 733 idx = 0; 734 /* A-part */ 735 for (k=0; k<countA; k++) { 736 for (j=0; j<bs; j++) { 737 for (n=0; n<bs; n++) { 738 if (reuse == MAT_INITIAL_MATRIX) { 739 row[jj] = irow + n + shift; 740 col[jj] = rstart + bs*ajj[k] + j + shift; 741 } 742 val[jj++] = v1[idx++]; 743 } 744 } 745 } 746 747 idx = 0; 748 /* B-part */ 749 for (k=0; k<countB; k++) { 750 for (j=0; j<bs; j++) { 751 for (n=0; n<bs; n++) { 752 if (reuse == MAT_INITIAL_MATRIX) { 753 row[jj] = irow + n + shift; 754 col[jj] = bs*garray[bjj[k]] + j + shift; 755 } 756 val[jj++] = v2[idx++]; 757 } 758 } 759 } 760 irow += bs; 761 } 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v) 766 { 767 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 768 PetscErrorCode ierr; 769 PetscInt rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 770 PetscInt *row,*col; 771 const PetscScalar *av, *bv,*v1,*v2; 772 PetscScalar *val; 773 Mat Ad,Ao; 774 Mat_SeqAIJ *aa; 775 Mat_SeqAIJ *bb; 776 #if defined(PETSC_USE_COMPLEX) 777 PetscBool hermitian; 778 #endif 779 780 PetscFunctionBegin; 781 #if defined(PETSC_USE_COMPLEX) 782 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 783 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 784 #endif 785 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 786 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 787 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 788 789 aa = (Mat_SeqAIJ*)(Ad)->data; 790 bb = (Mat_SeqAIJ*)(Ao)->data; 791 ai = aa->i; 792 aj = aa->j; 793 adiag = aa->diag; 794 bi = bb->i; 795 bj = bb->j; 796 797 rstart = A->rmap->rstart; 798 799 if (reuse == MAT_INITIAL_MATRIX) { 800 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 801 nzb = 0; /* num of upper triangular entries in mat->B */ 802 for (i=0; i<m; i++) { 803 nza += (ai[i+1] - adiag[i]); 804 countB = bi[i+1] - bi[i]; 805 bjj = bj + bi[i]; 806 for (j=0; j<countB; j++) { 807 if (garray[bjj[j]] > rstart) nzb++; 808 } 809 } 810 811 nz = nza + nzb; /* total nz of upper triangular part of mat */ 812 *nnz = nz; 813 ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr); 814 col = row + nz; 815 val = (PetscScalar*)(col + nz); 816 817 *r = row; *c = col; *v = val; 818 } else { 819 row = *r; col = *c; val = *v; 820 } 821 822 jj = 0; irow = rstart; 823 for (i=0; i<m; i++) { 824 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 825 v1 = av + adiag[i]; 826 countA = ai[i+1] - adiag[i]; 827 countB = bi[i+1] - bi[i]; 828 bjj = bj + bi[i]; 829 v2 = bv + bi[i]; 830 831 /* A-part */ 832 for (j=0; j<countA; j++) { 833 if (reuse == MAT_INITIAL_MATRIX) { 834 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 835 } 836 val[jj++] = v1[j]; 837 } 838 839 /* B-part */ 840 for (j=0; j < countB; j++) { 841 if (garray[bjj[j]] > rstart) { 842 if (reuse == MAT_INITIAL_MATRIX) { 843 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 844 } 845 val[jj++] = v2[j]; 846 } 847 } 848 irow++; 849 } 850 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 851 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatDestroy_MUMPS(Mat A) 856 { 857 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 858 PetscErrorCode ierr; 859 860 PetscFunctionBegin; 861 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 862 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 863 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 864 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 865 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 866 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 867 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); 868 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 869 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 870 mumps->id.job = JOB_END; 871 PetscMUMPS_c(mumps); 872 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 873 if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); } 874 #endif 875 ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr); 876 ierr = PetscFree(A->data);CHKERRQ(ierr); 877 878 /* clear composed functions */ 879 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 896 { 897 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 898 PetscScalar *array; 899 Vec b_seq; 900 IS is_iden,is_petsc; 901 PetscErrorCode ierr; 902 PetscInt i; 903 PetscBool second_solve = PETSC_FALSE; 904 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 905 906 PetscFunctionBegin; 907 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 908 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 909 910 if (A->factorerrortype) { 911 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 912 ierr = VecSetInf(x);CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 mumps->id.ICNTL(20) = 0; /* dense RHS */ 917 mumps->id.nrhs = 1; 918 b_seq = mumps->b_seq; 919 if (mumps->petsc_size > 1) { 920 /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */ 921 ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);} 924 } else { /* petsc_size == 1 */ 925 ierr = VecCopy(b,x);CHKERRQ(ierr); 926 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 927 } 928 if (!mumps->myid) { /* define rhs on the host */ 929 mumps->id.nrhs = 1; 930 mumps->id.rhs = (MumpsScalar*)array; 931 } 932 933 /* 934 handle condensation step of Schur complement (if any) 935 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 936 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 937 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 938 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 939 */ 940 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 941 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 942 second_solve = PETSC_TRUE; 943 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 944 } 945 /* solve phase */ 946 /*-------------*/ 947 mumps->id.job = JOB_SOLVE; 948 PetscMUMPS_c(mumps); 949 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 950 951 /* handle expansion step of Schur complement (if any) */ 952 if (second_solve) { 953 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 954 } 955 956 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 957 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 958 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 959 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 960 } 961 if (!mumps->scat_sol) { /* create scatter scat_sol */ 962 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 963 for (i=0; i<mumps->id.lsol_loc; i++) { 964 mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */ 965 } 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr); /* to */ 967 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 968 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 969 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 970 971 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 972 } 973 974 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 } 977 978 if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}} 979 else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 980 981 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 986 { 987 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 mumps->id.ICNTL(9) = 0; 992 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 993 mumps->id.ICNTL(9) = 1; 994 PetscFunctionReturn(0); 995 } 996 997 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 998 { 999 PetscErrorCode ierr; 1000 Mat Bt = NULL; 1001 PetscBool flg, flgT; 1002 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1003 PetscInt i,nrhs,M; 1004 PetscScalar *array; 1005 const PetscScalar *rbray; 1006 PetscInt lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0; 1007 PetscScalar *bray,*sol_loc,*sol_loc_save; 1008 IS is_to,is_from; 1009 PetscInt k,proc,j,m,myrstart; 1010 const PetscInt *rstart; 1011 Vec v_mpi,b_seq,msol_loc; 1012 VecScatter scat_rhs,scat_sol; 1013 PetscScalar *aa; 1014 PetscInt spnr,*ia,*ja; 1015 Mat_MPIAIJ *b = NULL; 1016 1017 PetscFunctionBegin; 1018 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1019 if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1020 1021 ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1022 if (flg) { /* dense B */ 1023 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1024 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1025 } else { /* sparse B */ 1026 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1027 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1028 if (flgT) { /* input B is transpose of actural RHS matrix, 1029 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1030 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1031 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1032 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1033 } 1034 1035 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1036 mumps->id.nrhs = nrhs; 1037 mumps->id.lrhs = M; 1038 mumps->id.rhs = NULL; 1039 1040 if (mumps->petsc_size == 1) { 1041 PetscScalar *aa; 1042 PetscInt spnr,*ia,*ja; 1043 PetscBool second_solve = PETSC_FALSE; 1044 1045 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1046 mumps->id.rhs = (MumpsScalar*)array; 1047 1048 if (!Bt) { /* dense B */ 1049 /* copy B to X */ 1050 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1051 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1052 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1053 } else { /* sparse B */ 1054 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1055 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1056 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1057 /* mumps requires ia and ja start at 1! */ 1058 mumps->id.irhs_ptr = ia; 1059 mumps->id.irhs_sparse = ja; 1060 mumps->id.nz_rhs = ia[spnr] - 1; 1061 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1062 } 1063 /* handle condensation step of Schur complement (if any) */ 1064 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1065 second_solve = PETSC_TRUE; 1066 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1067 } 1068 /* solve phase */ 1069 /*-------------*/ 1070 mumps->id.job = JOB_SOLVE; 1071 PetscMUMPS_c(mumps); 1072 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1073 1074 /* handle expansion step of Schur complement (if any) */ 1075 if (second_solve) { 1076 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1077 } 1078 if (Bt) { /* sparse B */ 1079 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1080 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1081 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1082 } 1083 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1088 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1089 1090 /* create msol_loc to hold mumps local solution */ 1091 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1092 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1093 1094 lsol_loc = mumps->id.lsol_loc; 1095 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1096 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1097 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1098 mumps->id.isol_loc = isol_loc; 1099 1100 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1101 1102 if (!Bt) { /* dense B */ 1103 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1104 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1105 0, re-arrange B into desired order, which is a local operation. 1106 */ 1107 1108 /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */ 1109 /* wrap dense rhs matrix B into a vector v_mpi */ 1110 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1111 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1112 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1113 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1114 1115 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1116 if (!mumps->myid) { 1117 PetscInt *idx; 1118 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1119 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1120 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1121 k = 0; 1122 for (proc=0; proc<mumps->petsc_size; proc++){ 1123 for (j=0; j<nrhs; j++){ 1124 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1125 } 1126 } 1127 1128 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1129 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1130 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1131 } else { 1132 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1133 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1134 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1135 } 1136 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1137 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1139 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 1142 if (!mumps->myid) { /* define rhs on the host */ 1143 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1144 mumps->id.rhs = (MumpsScalar*)bray; 1145 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1146 } 1147 1148 } else { /* sparse B */ 1149 b = (Mat_MPIAIJ*)Bt->data; 1150 1151 /* wrap dense X into a vector v_mpi */ 1152 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1153 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1154 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1155 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1156 1157 if (!mumps->myid) { 1158 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1159 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1160 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1161 /* mumps requires ia and ja start at 1! */ 1162 mumps->id.irhs_ptr = ia; 1163 mumps->id.irhs_sparse = ja; 1164 mumps->id.nz_rhs = ia[spnr] - 1; 1165 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1166 } else { 1167 mumps->id.irhs_ptr = NULL; 1168 mumps->id.irhs_sparse = NULL; 1169 mumps->id.nz_rhs = 0; 1170 mumps->id.rhs_sparse = NULL; 1171 } 1172 } 1173 1174 /* solve phase */ 1175 /*-------------*/ 1176 mumps->id.job = JOB_SOLVE; 1177 PetscMUMPS_c(mumps); 1178 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1179 1180 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1181 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1182 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1183 1184 /* create scatter scat_sol */ 1185 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1186 /* iidx: index for scatter mumps solution to petsc X */ 1187 1188 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1189 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1190 for (i=0; i<lsol_loc; i++) { 1191 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1192 1193 for (proc=0; proc<mumps->petsc_size; proc++){ 1194 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1195 myrstart = rstart[proc]; 1196 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1197 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1198 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1199 break; 1200 } 1201 } 1202 1203 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1204 } 1205 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1206 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1207 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1209 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1210 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1212 1213 /* free spaces */ 1214 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1215 mumps->id.isol_loc = isol_loc_save; 1216 1217 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1218 ierr = PetscFree(idxx);CHKERRQ(ierr); 1219 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1220 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1221 if (Bt) { 1222 if (!mumps->myid) { 1223 b = (Mat_MPIAIJ*)Bt->data; 1224 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1225 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1226 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1227 } 1228 } else { 1229 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1230 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1231 } 1232 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1233 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1238 { 1239 PetscErrorCode ierr; 1240 PetscBool flg; 1241 Mat B; 1242 1243 PetscFunctionBegin; 1244 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1245 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1246 1247 /* Create B=Bt^T that uses Bt's data structure */ 1248 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1249 1250 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1251 ierr = MatDestroy(&B);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #if !defined(PETSC_USE_COMPLEX) 1256 /* 1257 input: 1258 F: numeric factor 1259 output: 1260 nneg: total number of negative pivots 1261 nzero: total number of zero pivots 1262 npos: (global dimension of F) - nneg - nzero 1263 */ 1264 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 1265 { 1266 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1267 PetscErrorCode ierr; 1268 PetscMPIInt size; 1269 1270 PetscFunctionBegin; 1271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1272 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1273 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1274 1275 if (nneg) *nneg = mumps->id.INFOG(12); 1276 if (nzero || npos) { 1277 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1278 if (nzero) *nzero = mumps->id.INFOG(28); 1279 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 #endif 1284 1285 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1286 { 1287 PetscErrorCode ierr; 1288 PetscInt i,nz=0,*irn,*jcn=0; 1289 PetscScalar *val=0; 1290 PetscMPIInt mpinz,*recvcount=NULL,*displs=NULL; 1291 1292 PetscFunctionBegin; 1293 if (mumps->omp_comm_size > 1) { 1294 if (reuse == MAT_INITIAL_MATRIX) { 1295 /* master first gathers counts of nonzeros to receive */ 1296 if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); } 1297 ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr); 1298 ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1299 1300 /* master allocates memory to receive nonzeros */ 1301 if (mumps->is_omp_master) { 1302 displs[0] = 0; 1303 for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1]; 1304 nz = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1]; 1305 ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr); 1306 jcn = irn + nz; 1307 val = (PetscScalar*)(jcn + nz); 1308 } 1309 1310 /* save the gatherv plan */ 1311 mumps->mpinz = mpinz; /* used as send count */ 1312 mumps->recvcount = recvcount; 1313 mumps->displs = displs; 1314 1315 /* master gathers nonzeros */ 1316 ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1317 ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1318 ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1319 1320 /* master frees its row/col/val and replaces them with bigger arrays */ 1321 if (mumps->is_omp_master) { 1322 ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */ 1323 mumps->nz = nz; /* it is a sum of mpinz over omp_comm */ 1324 mumps->irn = irn; 1325 mumps->jcn = jcn; 1326 mumps->val = val; 1327 } 1328 } else { 1329 ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr); 1330 } 1331 } 1332 PetscFunctionReturn(0); 1333 } 1334 1335 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1336 { 1337 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1338 PetscErrorCode ierr; 1339 PetscBool isMPIAIJ; 1340 1341 PetscFunctionBegin; 1342 if (mumps->id.INFOG(1) < 0) { 1343 if (mumps->id.INFOG(1) == -6) { 1344 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1345 } 1346 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1351 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1352 1353 /* numerical factorization phase */ 1354 /*-------------------------------*/ 1355 mumps->id.job = JOB_FACTNUMERIC; 1356 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1357 if (!mumps->myid) { 1358 mumps->id.a = (MumpsScalar*)mumps->val; 1359 } 1360 } else { 1361 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1362 } 1363 PetscMUMPS_c(mumps); 1364 if (mumps->id.INFOG(1) < 0) { 1365 if (A->erroriffailure) { 1366 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1367 } else { 1368 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1369 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1370 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1371 } else if (mumps->id.INFOG(1) == -13) { 1372 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1373 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1374 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) { 1375 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1376 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1377 } else { 1378 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1379 F->factorerrortype = MAT_FACTOR_OTHER; 1380 } 1381 } 1382 } 1383 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1384 1385 F->assembled = PETSC_TRUE; 1386 mumps->matstruc = SAME_NONZERO_PATTERN; 1387 if (F->schur) { /* reset Schur status to unfactored */ 1388 #if defined(PETSC_HAVE_CUDA) 1389 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1390 #endif 1391 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1392 mumps->id.ICNTL(19) = 2; 1393 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1394 } 1395 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1396 } 1397 1398 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1399 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1400 1401 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1402 if (mumps->petsc_size > 1) { 1403 PetscInt lsol_loc; 1404 PetscScalar *sol_loc; 1405 1406 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1407 1408 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1409 if (mumps->x_seq) { 1410 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1411 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1412 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1413 } 1414 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1415 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1416 mumps->id.lsol_loc = lsol_loc; 1417 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1418 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1419 } 1420 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 /* Sets MUMPS options from the options database */ 1425 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1426 { 1427 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1428 PetscErrorCode ierr; 1429 PetscInt icntl,info[80],i,ninfo=80; 1430 PetscBool flg; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1434 ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1435 if (flg) mumps->id.ICNTL(1) = icntl; 1436 ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1437 if (flg) mumps->id.ICNTL(2) = icntl; 1438 ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1439 if (flg) mumps->id.ICNTL(3) = icntl; 1440 1441 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1442 if (flg) mumps->id.ICNTL(4) = icntl; 1443 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1444 1445 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1446 if (flg) mumps->id.ICNTL(6) = icntl; 1447 1448 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1449 if (flg) { 1450 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 1451 else mumps->id.ICNTL(7) = icntl; 1452 } 1453 1454 ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1455 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1456 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1457 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1458 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1459 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1460 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1461 ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1462 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1463 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1464 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1465 } 1466 /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */ 1467 /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1468 1469 ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1470 ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1471 ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1472 if (mumps->id.ICNTL(24)) { 1473 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1474 } 1475 1476 ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1477 ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1478 ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1479 ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1480 ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1481 /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1482 ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1483 /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1484 ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1485 ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1486 ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1487 ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1488 1489 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1490 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1491 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1492 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1493 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1494 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1495 1496 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr); 1497 1498 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1499 if (ninfo) { 1500 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1501 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1502 mumps->ninfo = ninfo; 1503 for (i=0; i<ninfo; i++) { 1504 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1505 else mumps->info[i] = info[i]; 1506 } 1507 } 1508 1509 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1514 { 1515 PetscErrorCode ierr; 1516 PetscInt nthreads=0; 1517 1518 PetscFunctionBegin; 1519 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1520 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1521 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1522 1523 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1524 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1525 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1526 if (mumps->use_petsc_omp_support) { 1527 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1528 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1529 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1530 #else 1531 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1532 #endif 1533 } else { 1534 mumps->omp_comm = PETSC_COMM_SELF; 1535 mumps->mumps_comm = mumps->petsc_comm; 1536 mumps->is_omp_master = PETSC_TRUE; 1537 } 1538 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1539 1540 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1541 mumps->id.job = JOB_INIT; 1542 mumps->id.par = 1; /* host participates factorizaton and solve */ 1543 mumps->id.sym = mumps->sym; 1544 1545 PetscMUMPS_c(mumps); 1546 1547 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1548 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1549 */ 1550 ierr = MPI_Bcast(mumps->id.icntl,60,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1551 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1552 1553 mumps->scat_rhs = NULL; 1554 mumps->scat_sol = NULL; 1555 1556 /* set PETSc-MUMPS default options - override MUMPS default */ 1557 mumps->id.ICNTL(3) = 0; 1558 mumps->id.ICNTL(4) = 0; 1559 if (mumps->petsc_size == 1) { 1560 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1561 } else { 1562 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1563 mumps->id.ICNTL(20) = 0; /* rhs is in dense format */ 1564 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1565 } 1566 1567 /* schur */ 1568 mumps->id.size_schur = 0; 1569 mumps->id.listvar_schur = NULL; 1570 mumps->id.schur = NULL; 1571 mumps->sizeredrhs = 0; 1572 mumps->schur_sol = NULL; 1573 mumps->schur_sizesol = 0; 1574 PetscFunctionReturn(0); 1575 } 1576 1577 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1578 { 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = MPI_Bcast(mumps->id.infog, 80,MPIU_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 manual p82 */ 1583 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1584 if (mumps->id.INFOG(1) < 0) { 1585 if (A->erroriffailure) { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1587 } else { 1588 if (mumps->id.INFOG(1) == -6) { 1589 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1590 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1591 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1592 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1593 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1594 } else { 1595 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1596 F->factorerrortype = MAT_FACTOR_OTHER; 1597 } 1598 } 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1604 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1605 { 1606 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1607 PetscErrorCode ierr; 1608 Vec b; 1609 const PetscInt M = A->rmap->N; 1610 1611 PetscFunctionBegin; 1612 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1613 1614 /* Set MUMPS options from the options database */ 1615 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1616 1617 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1618 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1619 1620 /* analysis phase */ 1621 /*----------------*/ 1622 mumps->id.job = JOB_FACTSYMBOLIC; 1623 mumps->id.n = M; 1624 switch (mumps->id.ICNTL(18)) { 1625 case 0: /* centralized assembled matrix input */ 1626 if (!mumps->myid) { 1627 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1628 if (mumps->id.ICNTL(6)>1) { 1629 mumps->id.a = (MumpsScalar*)mumps->val; 1630 } 1631 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1632 /* 1633 PetscBool flag; 1634 ierr = ISEqual(r,c,&flag);CHKERRQ(ierr); 1635 if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm"); 1636 ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF); 1637 */ 1638 if (!mumps->myid) { 1639 const PetscInt *idx; 1640 PetscInt i,*perm_in; 1641 1642 ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr); 1643 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1644 1645 mumps->id.perm_in = perm_in; 1646 for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */ 1647 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 break; 1652 case 3: /* distributed assembled matrix input (size>1) */ 1653 mumps->id.nz_loc = mumps->nz; 1654 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1655 if (mumps->id.ICNTL(6)>1) { 1656 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1657 } 1658 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1659 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1660 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1661 ierr = VecDestroy(&b);CHKERRQ(ierr); 1662 break; 1663 } 1664 PetscMUMPS_c(mumps); 1665 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1666 1667 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1668 F->ops->solve = MatSolve_MUMPS; 1669 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1670 F->ops->matsolve = MatMatSolve_MUMPS; 1671 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1672 PetscFunctionReturn(0); 1673 } 1674 1675 /* Note the Petsc r and c permutations are ignored */ 1676 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1677 { 1678 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1679 PetscErrorCode ierr; 1680 Vec b; 1681 const PetscInt M = A->rmap->N; 1682 1683 PetscFunctionBegin; 1684 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1685 1686 /* Set MUMPS options from the options database */ 1687 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1688 1689 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1690 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1691 1692 /* analysis phase */ 1693 /*----------------*/ 1694 mumps->id.job = JOB_FACTSYMBOLIC; 1695 mumps->id.n = M; 1696 switch (mumps->id.ICNTL(18)) { 1697 case 0: /* centralized assembled matrix input */ 1698 if (!mumps->myid) { 1699 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1700 if (mumps->id.ICNTL(6)>1) { 1701 mumps->id.a = (MumpsScalar*)mumps->val; 1702 } 1703 } 1704 break; 1705 case 3: /* distributed assembled matrix input (size>1) */ 1706 mumps->id.nz_loc = mumps->nz; 1707 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1708 if (mumps->id.ICNTL(6)>1) { 1709 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1710 } 1711 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1712 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1713 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1714 ierr = VecDestroy(&b);CHKERRQ(ierr); 1715 break; 1716 } 1717 PetscMUMPS_c(mumps); 1718 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1719 1720 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 1721 F->ops->solve = MatSolve_MUMPS; 1722 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 1723 PetscFunctionReturn(0); 1724 } 1725 1726 /* Note the Petsc r permutation and factor info are ignored */ 1727 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 1728 { 1729 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1730 PetscErrorCode ierr; 1731 Vec b; 1732 const PetscInt M = A->rmap->N; 1733 1734 PetscFunctionBegin; 1735 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1736 1737 /* Set MUMPS options from the options database */ 1738 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1739 1740 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr); 1741 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1742 1743 /* analysis phase */ 1744 /*----------------*/ 1745 mumps->id.job = JOB_FACTSYMBOLIC; 1746 mumps->id.n = M; 1747 switch (mumps->id.ICNTL(18)) { 1748 case 0: /* centralized assembled matrix input */ 1749 if (!mumps->myid) { 1750 mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn; 1751 if (mumps->id.ICNTL(6)>1) { 1752 mumps->id.a = (MumpsScalar*)mumps->val; 1753 } 1754 } 1755 break; 1756 case 3: /* distributed assembled matrix input (size>1) */ 1757 mumps->id.nz_loc = mumps->nz; 1758 mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn; 1759 if (mumps->id.ICNTL(6)>1) { 1760 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1761 } 1762 /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1763 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1764 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1765 ierr = VecDestroy(&b);CHKERRQ(ierr); 1766 break; 1767 } 1768 PetscMUMPS_c(mumps); 1769 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 1770 1771 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 1772 F->ops->solve = MatSolve_MUMPS; 1773 F->ops->solvetranspose = MatSolve_MUMPS; 1774 F->ops->matsolve = MatMatSolve_MUMPS; 1775 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 1776 #if defined(PETSC_USE_COMPLEX) 1777 F->ops->getinertia = NULL; 1778 #else 1779 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 1780 #endif 1781 PetscFunctionReturn(0); 1782 } 1783 1784 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 1785 { 1786 PetscErrorCode ierr; 1787 PetscBool iascii; 1788 PetscViewerFormat format; 1789 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1790 1791 PetscFunctionBegin; 1792 /* check if matrix is mumps type */ 1793 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 1794 1795 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1796 if (iascii) { 1797 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1798 if (format == PETSC_VIEWER_ASCII_INFO) { 1799 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 1800 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 1801 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 1804 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 1805 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 1806 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 1807 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 1808 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 1809 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 1810 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 1811 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 1812 if (mumps->id.ICNTL(11)>0) { 1813 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 1814 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 1815 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 1816 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 1817 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 1818 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 1819 } 1820 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 1821 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 1822 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 1823 /* ICNTL(15-17) not used */ 1824 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 1825 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 1826 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (rhs sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 1827 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 1828 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 1829 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 1830 1831 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 1832 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 1833 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for rhs or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 1834 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (experimental parameter): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 1835 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 1836 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 1837 1838 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 1839 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 1840 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 1841 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 1842 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 1843 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 1844 1845 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 1846 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 1847 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 1848 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 1849 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 1850 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 1851 1852 /* infomation local to each processor */ 1853 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 1854 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1855 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 1856 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1857 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 1858 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 1859 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1860 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 1861 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 1862 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1863 1864 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 1865 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 1866 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1867 1868 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 1869 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 1870 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1871 1872 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 1873 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 1874 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1875 1876 if (mumps->ninfo && mumps->ninfo <= 80){ 1877 PetscInt i; 1878 for (i=0; i<mumps->ninfo; i++){ 1879 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 1880 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 1881 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1882 } 1883 } 1884 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1885 1886 if (!mumps->myid) { /* information from the host */ 1887 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 1888 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 1889 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 1890 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 1891 1892 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 1893 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 1894 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 1895 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 1896 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 1897 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 1898 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 1899 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 1900 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 1901 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 1902 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 1903 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 1904 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 1905 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 1906 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 1907 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 1908 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 1909 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 1910 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 1911 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 1912 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 1913 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 1914 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 1915 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 1916 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 1917 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 1918 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 1919 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 1920 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 1921 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 1922 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 1923 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 1924 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 1925 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 1926 } 1927 } 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 1933 { 1934 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 1935 1936 PetscFunctionBegin; 1937 info->block_size = 1.0; 1938 info->nz_allocated = mumps->id.INFOG(20); 1939 info->nz_used = mumps->id.INFOG(20); 1940 info->nz_unneeded = 0.0; 1941 info->assemblies = 0.0; 1942 info->mallocs = 0.0; 1943 info->memory = 0.0; 1944 info->fill_ratio_given = 0; 1945 info->fill_ratio_needed = 0; 1946 info->factor_mallocs = 0; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 /* -------------------------------------------------------------------------------------------*/ 1951 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 1952 { 1953 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1954 const PetscScalar *arr; 1955 const PetscInt *idxs; 1956 PetscInt size,i; 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 1961 if (mumps->petsc_size > 1) { 1962 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 1963 1964 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 1965 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 1966 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 1967 } 1968 1969 /* Schur complement matrix */ 1970 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1971 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 1972 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 1973 mumps->id.schur = (MumpsScalar*)arr; 1974 mumps->id.size_schur = size; 1975 mumps->id.schur_lld = size; 1976 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 1977 if (mumps->sym == 1) { 1978 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 1979 } 1980 1981 /* MUMPS expects Fortran style indices */ 1982 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 1983 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 1984 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 1985 ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr); 1986 for (i=0;i<size;i++) mumps->id.listvar_schur[i]++; 1987 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 1988 if (mumps->petsc_size > 1) { 1989 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 1990 } else { 1991 if (F->factortype == MAT_FACTOR_LU) { 1992 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 1993 } else { 1994 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 1995 } 1996 } 1997 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 1998 mumps->id.ICNTL(26) = -1; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /* -------------------------------------------------------------------------------------------*/ 2003 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2004 { 2005 Mat St; 2006 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2007 PetscScalar *array; 2008 #if defined(PETSC_USE_COMPLEX) 2009 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2010 #endif 2011 PetscErrorCode ierr; 2012 2013 PetscFunctionBegin; 2014 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2015 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2016 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2017 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2018 ierr = MatSetUp(St);CHKERRQ(ierr); 2019 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2020 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2021 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2022 PetscInt i,j,N=mumps->id.size_schur; 2023 for (i=0;i<N;i++) { 2024 for (j=0;j<N;j++) { 2025 #if !defined(PETSC_USE_COMPLEX) 2026 PetscScalar val = mumps->id.schur[i*N+j]; 2027 #else 2028 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2029 #endif 2030 array[j*N+i] = val; 2031 } 2032 } 2033 } else { /* stored by columns */ 2034 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2035 } 2036 } else { /* either full or lower-triangular (not packed) */ 2037 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2038 PetscInt i,j,N=mumps->id.size_schur; 2039 for (i=0;i<N;i++) { 2040 for (j=i;j<N;j++) { 2041 #if !defined(PETSC_USE_COMPLEX) 2042 PetscScalar val = mumps->id.schur[i*N+j]; 2043 #else 2044 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2045 #endif 2046 array[i*N+j] = val; 2047 array[j*N+i] = val; 2048 } 2049 } 2050 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2051 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2052 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2053 PetscInt i,j,N=mumps->id.size_schur; 2054 for (i=0;i<N;i++) { 2055 for (j=0;j<i+1;j++) { 2056 #if !defined(PETSC_USE_COMPLEX) 2057 PetscScalar val = mumps->id.schur[i*N+j]; 2058 #else 2059 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2060 #endif 2061 array[i*N+j] = val; 2062 array[j*N+i] = val; 2063 } 2064 } 2065 } 2066 } 2067 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2068 *S = St; 2069 PetscFunctionReturn(0); 2070 } 2071 2072 /* -------------------------------------------------------------------------------------------*/ 2073 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2074 { 2075 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2076 2077 PetscFunctionBegin; 2078 mumps->id.ICNTL(icntl) = ival; 2079 PetscFunctionReturn(0); 2080 } 2081 2082 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2083 { 2084 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2085 2086 PetscFunctionBegin; 2087 *ival = mumps->id.ICNTL(icntl); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 /*@ 2092 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2093 2094 Logically Collective on Mat 2095 2096 Input Parameters: 2097 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2098 . icntl - index of MUMPS parameter array ICNTL() 2099 - ival - value of MUMPS ICNTL(icntl) 2100 2101 Options Database: 2102 . -mat_mumps_icntl_<icntl> <ival> 2103 2104 Level: beginner 2105 2106 References: 2107 . MUMPS Users' Guide 2108 2109 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2110 @*/ 2111 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2112 { 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 PetscValidType(F,1); 2117 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2118 PetscValidLogicalCollectiveInt(F,icntl,2); 2119 PetscValidLogicalCollectiveInt(F,ival,3); 2120 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 /*@ 2125 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2126 2127 Logically Collective on Mat 2128 2129 Input Parameters: 2130 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2131 - icntl - index of MUMPS parameter array ICNTL() 2132 2133 Output Parameter: 2134 . ival - value of MUMPS ICNTL(icntl) 2135 2136 Level: beginner 2137 2138 References: 2139 . MUMPS Users' Guide 2140 2141 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2142 @*/ 2143 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2144 { 2145 PetscErrorCode ierr; 2146 2147 PetscFunctionBegin; 2148 PetscValidType(F,1); 2149 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2150 PetscValidLogicalCollectiveInt(F,icntl,2); 2151 PetscValidIntPointer(ival,3); 2152 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 /* -------------------------------------------------------------------------------------------*/ 2157 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2158 { 2159 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2160 2161 PetscFunctionBegin; 2162 mumps->id.CNTL(icntl) = val; 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2167 { 2168 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2169 2170 PetscFunctionBegin; 2171 *val = mumps->id.CNTL(icntl); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /*@ 2176 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2177 2178 Logically Collective on Mat 2179 2180 Input Parameters: 2181 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2182 . icntl - index of MUMPS parameter array CNTL() 2183 - val - value of MUMPS CNTL(icntl) 2184 2185 Options Database: 2186 . -mat_mumps_cntl_<icntl> <val> 2187 2188 Level: beginner 2189 2190 References: 2191 . MUMPS Users' Guide 2192 2193 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2194 @*/ 2195 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2196 { 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 PetscValidType(F,1); 2201 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2202 PetscValidLogicalCollectiveInt(F,icntl,2); 2203 PetscValidLogicalCollectiveReal(F,val,3); 2204 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 /*@ 2209 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2210 2211 Logically Collective on Mat 2212 2213 Input Parameters: 2214 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2215 - icntl - index of MUMPS parameter array CNTL() 2216 2217 Output Parameter: 2218 . val - value of MUMPS CNTL(icntl) 2219 2220 Level: beginner 2221 2222 References: 2223 . MUMPS Users' Guide 2224 2225 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2226 @*/ 2227 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2228 { 2229 PetscErrorCode ierr; 2230 2231 PetscFunctionBegin; 2232 PetscValidType(F,1); 2233 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2234 PetscValidLogicalCollectiveInt(F,icntl,2); 2235 PetscValidRealPointer(val,3); 2236 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2237 PetscFunctionReturn(0); 2238 } 2239 2240 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2241 { 2242 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2243 2244 PetscFunctionBegin; 2245 *info = mumps->id.INFO(icntl); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2250 { 2251 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2252 2253 PetscFunctionBegin; 2254 *infog = mumps->id.INFOG(icntl); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2259 { 2260 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2261 2262 PetscFunctionBegin; 2263 *rinfo = mumps->id.RINFO(icntl); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2268 { 2269 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2270 2271 PetscFunctionBegin; 2272 *rinfog = mumps->id.RINFOG(icntl); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2277 { 2278 PetscErrorCode ierr; 2279 Mat Bt = NULL,Btseq = NULL; 2280 PetscBool flg; 2281 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2282 PetscScalar *aa; 2283 PetscInt spnr,*ia,*ja; 2284 2285 PetscFunctionBegin; 2286 PetscValidIntPointer(spRHS,2); 2287 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2288 if (flg) { 2289 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2290 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2291 2292 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2293 2294 if (mumps->petsc_size > 1) { 2295 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2296 Btseq = b->A; 2297 } else { 2298 Btseq = Bt; 2299 } 2300 2301 if (!mumps->myid) { 2302 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2303 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2304 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2305 2306 mumps->id.irhs_ptr = ia; 2307 mumps->id.irhs_sparse = ja; 2308 mumps->id.nz_rhs = ia[spnr] - 1; 2309 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2310 } else { 2311 mumps->id.irhs_ptr = NULL; 2312 mumps->id.irhs_sparse = NULL; 2313 mumps->id.nz_rhs = 0; 2314 mumps->id.rhs_sparse = NULL; 2315 } 2316 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2317 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2318 2319 /* solve phase */ 2320 /*-------------*/ 2321 mumps->id.job = JOB_SOLVE; 2322 PetscMUMPS_c(mumps); 2323 if (mumps->id.INFOG(1) < 0) 2324 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2325 2326 if (!mumps->myid) { 2327 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2328 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2329 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2330 } 2331 PetscFunctionReturn(0); 2332 } 2333 2334 /*@ 2335 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2336 2337 Logically Collective on Mat 2338 2339 Input Parameters: 2340 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2341 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2342 2343 Output Parameter: 2344 . spRHS - requested entries of inverse of A 2345 2346 Level: beginner 2347 2348 References: 2349 . MUMPS Users' Guide 2350 2351 .seealso: MatGetFactor(), MatCreateTranspose() 2352 @*/ 2353 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2354 { 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 PetscValidType(F,1); 2359 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2360 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2365 { 2366 PetscErrorCode ierr; 2367 Mat spRHS; 2368 2369 PetscFunctionBegin; 2370 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2371 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2372 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2373 PetscFunctionReturn(0); 2374 } 2375 2376 /*@ 2377 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2378 2379 Logically Collective on Mat 2380 2381 Input Parameters: 2382 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2383 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2384 2385 Output Parameter: 2386 . spRHST - requested entries of inverse of A^T 2387 2388 Level: beginner 2389 2390 References: 2391 . MUMPS Users' Guide 2392 2393 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2394 @*/ 2395 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2396 { 2397 PetscErrorCode ierr; 2398 PetscBool flg; 2399 2400 PetscFunctionBegin; 2401 PetscValidType(F,1); 2402 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2403 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2404 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2405 2406 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2407 PetscFunctionReturn(0); 2408 } 2409 2410 /*@ 2411 MatMumpsGetInfo - Get MUMPS parameter INFO() 2412 2413 Logically Collective on Mat 2414 2415 Input Parameters: 2416 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2417 - icntl - index of MUMPS parameter array INFO() 2418 2419 Output Parameter: 2420 . ival - value of MUMPS INFO(icntl) 2421 2422 Level: beginner 2423 2424 References: 2425 . MUMPS Users' Guide 2426 2427 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2428 @*/ 2429 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2430 { 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 PetscValidType(F,1); 2435 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2436 PetscValidIntPointer(ival,3); 2437 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 /*@ 2442 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2443 2444 Logically Collective on Mat 2445 2446 Input Parameters: 2447 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2448 - icntl - index of MUMPS parameter array INFOG() 2449 2450 Output Parameter: 2451 . ival - value of MUMPS INFOG(icntl) 2452 2453 Level: beginner 2454 2455 References: 2456 . MUMPS Users' Guide 2457 2458 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2459 @*/ 2460 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2461 { 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 PetscValidType(F,1); 2466 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2467 PetscValidIntPointer(ival,3); 2468 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 /*@ 2473 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2474 2475 Logically Collective on Mat 2476 2477 Input Parameters: 2478 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2479 - icntl - index of MUMPS parameter array RINFO() 2480 2481 Output Parameter: 2482 . val - value of MUMPS RINFO(icntl) 2483 2484 Level: beginner 2485 2486 References: 2487 . MUMPS Users' Guide 2488 2489 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2490 @*/ 2491 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 PetscValidType(F,1); 2497 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2498 PetscValidRealPointer(val,3); 2499 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /*@ 2504 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2505 2506 Logically Collective on Mat 2507 2508 Input Parameters: 2509 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2510 - icntl - index of MUMPS parameter array RINFOG() 2511 2512 Output Parameter: 2513 . val - value of MUMPS RINFOG(icntl) 2514 2515 Level: beginner 2516 2517 References: 2518 . MUMPS Users' Guide 2519 2520 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2521 @*/ 2522 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2523 { 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 PetscValidType(F,1); 2528 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2529 PetscValidRealPointer(val,3); 2530 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 /*MC 2535 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2536 distributed and sequential matrices via the external package MUMPS. 2537 2538 Works with MATAIJ and MATSBAIJ matrices 2539 2540 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2541 2542 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2543 2544 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2545 2546 Options Database Keys: 2547 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2548 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2549 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2550 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2551 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2552 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis 2553 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2554 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2555 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2556 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2557 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2558 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2559 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2560 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2561 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2562 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2563 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2564 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2565 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2566 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2567 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2568 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2569 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2570 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2571 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2572 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2573 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2574 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2575 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2576 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2577 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2578 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2579 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2580 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2581 2582 Level: beginner 2583 2584 Notes: 2585 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2586 2587 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2588 $ KSPGetPC(ksp,&pc); 2589 $ PCFactorGetMatrix(pc,&mat); 2590 $ MatMumpsGetInfo(mat,....); 2591 $ MatMumpsGetInfog(mat,....); etc. 2592 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2593 2594 Two modes to run MUMPS/PETSc with OpenMP 2595 2596 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2597 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2598 2599 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2600 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2601 2602 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2603 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2604 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2605 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2606 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2607 2608 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2609 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2610 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2611 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2612 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2613 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2614 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2615 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2616 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2617 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2618 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2619 examine the mapping result. 2620 2621 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2622 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2623 calls omp_set_num_threads(m) internally before calling MUMPS. 2624 2625 References: 2626 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2627 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2628 2629 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2630 2631 M*/ 2632 2633 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2634 { 2635 PetscFunctionBegin; 2636 *type = MATSOLVERMUMPS; 2637 PetscFunctionReturn(0); 2638 } 2639 2640 /* MatGetFactor for Seq and MPI AIJ matrices */ 2641 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2642 { 2643 Mat B; 2644 PetscErrorCode ierr; 2645 Mat_MUMPS *mumps; 2646 PetscBool isSeqAIJ; 2647 2648 PetscFunctionBegin; 2649 /* Create the factorization matrix */ 2650 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 2651 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2652 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2653 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2654 ierr = MatSetUp(B);CHKERRQ(ierr); 2655 2656 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2657 2658 B->ops->view = MatView_MUMPS; 2659 B->ops->getinfo = MatGetInfo_MUMPS; 2660 2661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2674 2675 if (ftype == MAT_FACTOR_LU) { 2676 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2677 B->factortype = MAT_FACTOR_LU; 2678 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 2679 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 2680 mumps->sym = 0; 2681 } else { 2682 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2683 B->factortype = MAT_FACTOR_CHOLESKY; 2684 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 2685 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 2686 #if defined(PETSC_USE_COMPLEX) 2687 mumps->sym = 2; 2688 #else 2689 if (A->spd_set && A->spd) mumps->sym = 1; 2690 else mumps->sym = 2; 2691 #endif 2692 } 2693 2694 /* set solvertype */ 2695 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2696 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2697 2698 B->ops->destroy = MatDestroy_MUMPS; 2699 B->data = (void*)mumps; 2700 2701 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2702 2703 *F = B; 2704 PetscFunctionReturn(0); 2705 } 2706 2707 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 2708 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 2709 { 2710 Mat B; 2711 PetscErrorCode ierr; 2712 Mat_MUMPS *mumps; 2713 PetscBool isSeqSBAIJ; 2714 2715 PetscFunctionBegin; 2716 if (ftype != MAT_FACTOR_CHOLESKY) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Cannot use PETSc SBAIJ matrices with MUMPS LU, use AIJ matrix"); 2717 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 2718 /* Create the factorization matrix */ 2719 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2720 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2721 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2722 ierr = MatSetUp(B);CHKERRQ(ierr); 2723 2724 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2725 if (isSeqSBAIJ) { 2726 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 2727 } else { 2728 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 2729 } 2730 2731 B->ops->getinfo = MatGetInfo_External; 2732 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 2733 B->ops->view = MatView_MUMPS; 2734 2735 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2736 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2737 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2738 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2739 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2740 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2748 2749 B->factortype = MAT_FACTOR_CHOLESKY; 2750 #if defined(PETSC_USE_COMPLEX) 2751 mumps->sym = 2; 2752 #else 2753 if (A->spd_set && A->spd) mumps->sym = 1; 2754 else mumps->sym = 2; 2755 #endif 2756 2757 /* set solvertype */ 2758 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2759 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2760 2761 B->ops->destroy = MatDestroy_MUMPS; 2762 B->data = (void*)mumps; 2763 2764 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2765 2766 *F = B; 2767 PetscFunctionReturn(0); 2768 } 2769 2770 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 2771 { 2772 Mat B; 2773 PetscErrorCode ierr; 2774 Mat_MUMPS *mumps; 2775 PetscBool isSeqBAIJ; 2776 2777 PetscFunctionBegin; 2778 /* Create the factorization matrix */ 2779 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 2780 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2781 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2782 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2783 ierr = MatSetUp(B);CHKERRQ(ierr); 2784 2785 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2786 if (ftype == MAT_FACTOR_LU) { 2787 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 2788 B->factortype = MAT_FACTOR_LU; 2789 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 2790 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 2791 mumps->sym = 0; 2792 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 2793 2794 B->ops->getinfo = MatGetInfo_External; 2795 B->ops->view = MatView_MUMPS; 2796 2797 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2798 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2799 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2800 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2801 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2802 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2803 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2804 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2806 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 2809 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 2810 2811 /* set solvertype */ 2812 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2813 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2814 2815 B->ops->destroy = MatDestroy_MUMPS; 2816 B->data = (void*)mumps; 2817 2818 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2819 2820 *F = B; 2821 PetscFunctionReturn(0); 2822 } 2823 2824 /* MatGetFactor for Seq and MPI SELL matrices */ 2825 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 2826 { 2827 Mat B; 2828 PetscErrorCode ierr; 2829 Mat_MUMPS *mumps; 2830 PetscBool isSeqSELL; 2831 2832 PetscFunctionBegin; 2833 /* Create the factorization matrix */ 2834 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 2835 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2836 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2837 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 2838 ierr = MatSetUp(B);CHKERRQ(ierr); 2839 2840 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 2841 2842 B->ops->view = MatView_MUMPS; 2843 B->ops->getinfo = MatGetInfo_MUMPS; 2844 2845 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 2846 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 2847 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 2848 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 2849 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 2850 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 2851 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 2852 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 2853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 2854 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 2856 2857 if (ftype == MAT_FACTOR_LU) { 2858 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 2859 B->factortype = MAT_FACTOR_LU; 2860 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 2861 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2862 mumps->sym = 0; 2863 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 2864 2865 /* set solvertype */ 2866 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 2867 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 2868 2869 B->ops->destroy = MatDestroy_MUMPS; 2870 B->data = (void*)mumps; 2871 2872 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 2873 2874 *F = B; 2875 PetscFunctionReturn(0); 2876 } 2877 2878 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 2879 { 2880 PetscErrorCode ierr; 2881 2882 PetscFunctionBegin; 2883 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2884 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2885 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2886 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2887 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2888 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2889 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 2890 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2891 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 2892 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 2893 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 2894 PetscFunctionReturn(0); 2895 } 2896 2897