1 2 /* 3 Provides an interface to the MUMPS sparse solver 4 */ 5 #include <petscpkg_version.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #if defined(PETSC_USE_REAL_SINGLE) 13 #include <cmumps_c.h> 14 #else 15 #include <zmumps_c.h> 16 #endif 17 #else 18 #if defined(PETSC_USE_REAL_SINGLE) 19 #include <smumps_c.h> 20 #else 21 #include <dmumps_c.h> 22 #endif 23 #endif 24 EXTERN_C_END 25 #define JOB_INIT -1 26 #define JOB_FACTSYMBOLIC 1 27 #define JOB_FACTNUMERIC 2 28 #define JOB_SOLVE 3 29 #define JOB_END -2 30 31 /* calls to MUMPS */ 32 #if defined(PETSC_USE_COMPLEX) 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #define MUMPS_c cmumps_c 35 #else 36 #define MUMPS_c zmumps_c 37 #endif 38 #else 39 #if defined(PETSC_USE_REAL_SINGLE) 40 #define MUMPS_c smumps_c 41 #else 42 #define MUMPS_c dmumps_c 43 #endif 44 #endif 45 46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 48 naming convention in PetscMPIInt, PetscBLASInt etc. 49 */ 50 typedef MUMPS_INT PetscMUMPSInt; 51 52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) 53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 55 #endif 56 #else 57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out" 59 #endif 60 #endif 61 62 #define MPIU_MUMPSINT MPI_INT 63 #define PETSC_MUMPS_INT_MAX 2147483647 64 #define PETSC_MUMPS_INT_MIN -2147483648 65 66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b) 68 { 69 PetscFunctionBegin; 70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 71 *b = (PetscMUMPSInt)(a); 72 PetscFunctionReturn(0); 73 } 74 75 /* Put these utility routines here since they are only used in this file */ 76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub) 77 { 78 PetscErrorCode ierr; 79 PetscInt myval; 80 PetscBool myset; 81 PetscFunctionBegin; 82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr); 84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);} 85 if (set) *set = myset; 86 PetscFunctionReturn(0); 87 } 88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX) 89 90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 91 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 92 #define PetscMUMPS_c(mumps) \ 93 do { \ 94 if (mumps->use_petsc_omp_support) { \ 95 if (mumps->is_omp_master) { \ 96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \ 97 MUMPS_c(&mumps->id); \ 98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \ 99 } \ 100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \ 101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 105 */ \ 106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \ 108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \ 109 } else { \ 110 MUMPS_c(&mumps->id); \ 111 } \ 112 } while (0) 113 #else 114 #define PetscMUMPS_c(mumps) \ 115 do { MUMPS_c(&mumps->id); } while (0) 116 #endif 117 118 /* declare MumpsScalar */ 119 #if defined(PETSC_USE_COMPLEX) 120 #if defined(PETSC_USE_REAL_SINGLE) 121 #define MumpsScalar mumps_complex 122 #else 123 #define MumpsScalar mumps_double_complex 124 #endif 125 #else 126 #define MumpsScalar PetscScalar 127 #endif 128 129 /* macros s.t. indices match MUMPS documentation */ 130 #define ICNTL(I) icntl[(I)-1] 131 #define CNTL(I) cntl[(I)-1] 132 #define INFOG(I) infog[(I)-1] 133 #define INFO(I) info[(I)-1] 134 #define RINFOG(I) rinfog[(I)-1] 135 #define RINFO(I) rinfo[(I)-1] 136 137 typedef struct Mat_MUMPS Mat_MUMPS; 138 struct Mat_MUMPS { 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_REAL_SINGLE) 141 CMUMPS_STRUC_C id; 142 #else 143 ZMUMPS_STRUC_C id; 144 #endif 145 #else 146 #if defined(PETSC_USE_REAL_SINGLE) 147 SMUMPS_STRUC_C id; 148 #else 149 DMUMPS_STRUC_C id; 150 #endif 151 #endif 152 153 MatStructure matstruc; 154 PetscMPIInt myid,petsc_size; 155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */ 156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 158 PetscMUMPSInt sym; 159 MPI_Comm mumps_comm; 160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc; 164 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 165 PetscInt *rhs_nrow,max_nrhs; 166 PetscMPIInt *rhs_recvcounts,*rhs_disps; 167 PetscScalar *rhs_loc,*rhs_recvbuf; 168 #endif 169 Vec b_seq,x_seq; 170 PetscInt ninfo,*info; /* which INFO to display */ 171 PetscInt sizeredrhs; 172 PetscScalar *schur_sol; 173 PetscInt schur_sizesol; 174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*); 177 178 /* stuff used by petsc/mumps OpenMP support*/ 179 PetscBool use_petsc_omp_support; 180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */ 182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */ 183 PetscMPIInt tag,omp_comm_size; 184 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 185 MPI_Request *reqs; 186 }; 187 188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 190 */ 191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps) 192 { 193 PetscErrorCode ierr; 194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */ 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_64BIT_INDICES) 198 { 199 PetscInt i; 200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr); 203 mumps->cur_ilen = nrow+1; 204 } 205 if (nnz > mumps->cur_jlen) { 206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr); 208 mumps->cur_jlen = nnz; 209 } 210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);} 211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);} 212 *ia_mumps = mumps->ia_alloc; 213 *ja_mumps = mumps->ja_alloc; 214 } 215 #else 216 *ia_mumps = ia; 217 *ja_mumps = ja; 218 #endif 219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps) 224 { 225 PetscErrorCode ierr; 226 227 PetscFunctionBegin; 228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 231 mumps->id.size_schur = 0; 232 mumps->id.schur_lld = 0; 233 mumps->id.ICNTL(19) = 0; 234 PetscFunctionReturn(0); 235 } 236 237 /* solve with rhs in mumps->id.redrhs and return in the same location */ 238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 239 { 240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 241 Mat S,B,X; 242 MatFactorSchurStatus schurstatus; 243 PetscInt sizesol; 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr); 248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr); 249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr); 250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr); 251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr); 253 #endif 254 switch (schurstatus) { 255 case MAT_FACTOR_SCHUR_FACTORED: 256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr); 257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 260 #endif 261 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr); 263 } else { 264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr); 265 } 266 break; 267 case MAT_FACTOR_SCHUR_INVERTED: 268 sizesol = mumps->id.nrhs*mumps->id.size_schur; 269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr); 271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr); 272 mumps->schur_sizesol = sizesol; 273 } 274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr); 275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr); 276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr); 278 #endif 279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr); 280 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr); 282 } else { 283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr); 284 } 285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr); 286 ierr = MatProductSymbolic(X);CHKERRQ(ierr); 287 ierr = MatProductNumeric(X);CHKERRQ(ierr); 288 289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 290 break; 291 default: 292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status); 293 break; 294 } 295 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr); 296 ierr = MatDestroy(&B);CHKERRQ(ierr); 297 ierr = MatDestroy(&X);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 302 { 303 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data; 304 PetscErrorCode ierr; 305 306 PetscFunctionBegin; 307 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 308 PetscFunctionReturn(0); 309 } 310 if (!expansion) { /* prepare for the condensation step */ 311 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur; 312 /* allocate MUMPS internal array to store reduced right-hand sides */ 313 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 314 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 315 mumps->id.lredrhs = mumps->id.size_schur; 316 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr); 317 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs; 318 } 319 mumps->id.ICNTL(26) = 1; /* condensation phase */ 320 } else { /* prepare for the expansion step */ 321 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */ 322 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr); 323 mumps->id.ICNTL(26) = 2; /* expansion phase */ 324 PetscMUMPS_c(mumps); 325 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 326 /* restore defaults */ 327 mumps->id.ICNTL(26) = -1; 328 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 329 if (mumps->id.nrhs > 1) { 330 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr); 331 mumps->id.lredrhs = 0; 332 mumps->sizeredrhs = 0; 333 } 334 } 335 PetscFunctionReturn(0); 336 } 337 338 /* 339 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz] 340 341 input: 342 A - matrix in aij,baij or sbaij format 343 shift - 0: C style output triple; 1: Fortran style output triple. 344 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 345 MAT_REUSE_MATRIX: only the values in v array are updated 346 output: 347 nnz - dim of r, c, and v (number of local nonzero entries of A) 348 r, c, v - row and col index, matrix values (matrix triples) 349 350 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 351 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 352 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 353 354 */ 355 356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 357 { 358 const PetscScalar *av; 359 const PetscInt *ai,*aj,*ajj,M=A->rmap->n; 360 PetscInt64 nz,rnz,i,j,k; 361 PetscErrorCode ierr; 362 PetscMUMPSInt *row,*col; 363 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 364 365 PetscFunctionBegin; 366 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 367 mumps->val = (PetscScalar*)av; 368 if (reuse == MAT_INITIAL_MATRIX) { 369 nz = aa->nz; 370 ai = aa->i; 371 aj = aa->j; 372 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 373 for (i=k=0; i<M; i++) { 374 rnz = ai[i+1] - ai[i]; 375 ajj = aj + ai[i]; 376 for (j=0; j<rnz; j++) { 377 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr); 378 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr); 379 k++; 380 } 381 } 382 mumps->irn = row; 383 mumps->jcn = col; 384 mumps->nnz = nz; 385 } 386 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 391 { 392 PetscErrorCode ierr; 393 PetscInt64 nz,i,j,k,r; 394 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data; 395 PetscMUMPSInt *row,*col; 396 397 PetscFunctionBegin; 398 mumps->val = a->val; 399 if (reuse == MAT_INITIAL_MATRIX) { 400 nz = a->sliidx[a->totalslices]; 401 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 402 for (i=k=0; i<a->totalslices; i++) { 403 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) { 404 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr); 405 } 406 } 407 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);} 408 mumps->irn = row; 409 mumps->jcn = col; 410 mumps->nnz = nz; 411 } 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 416 { 417 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data; 418 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2; 419 PetscInt64 M,nz,idx=0,rnz,i,j,k,m; 420 PetscInt bs; 421 PetscErrorCode ierr; 422 PetscMUMPSInt *row,*col; 423 424 PetscFunctionBegin; 425 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 426 M = A->rmap->N/bs; 427 mumps->val = aa->a; 428 if (reuse == MAT_INITIAL_MATRIX) { 429 ai = aa->i; aj = aa->j; 430 nz = bs2*aa->nz; 431 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 432 for (i=0; i<M; i++) { 433 ajj = aj + ai[i]; 434 rnz = ai[i+1] - ai[i]; 435 for (k=0; k<rnz; k++) { 436 for (j=0; j<bs; j++) { 437 for (m=0; m<bs; m++) { 438 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr); 439 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr); 440 idx++; 441 } 442 } 443 } 444 } 445 mumps->irn = row; 446 mumps->jcn = col; 447 mumps->nnz = nz; 448 } 449 PetscFunctionReturn(0); 450 } 451 452 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 453 { 454 const PetscInt *ai, *aj,*ajj; 455 PetscInt bs; 456 PetscInt64 nz,rnz,i,j,k,m; 457 PetscErrorCode ierr; 458 PetscMUMPSInt *row,*col; 459 PetscScalar *val; 460 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data; 461 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 462 #if defined(PETSC_USE_COMPLEX) 463 PetscBool hermitian; 464 #endif 465 466 PetscFunctionBegin; 467 #if defined(PETSC_USE_COMPLEX) 468 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 469 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 470 #endif 471 ai = aa->i; 472 aj = aa->j; 473 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 474 if (reuse == MAT_INITIAL_MATRIX) { 475 nz = aa->nz; 476 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr); 477 if (bs>1) { 478 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr); 479 mumps->val = mumps->val_alloc; 480 } else { 481 mumps->val = aa->a; 482 } 483 mumps->irn = row; 484 mumps->jcn = col; 485 } else { 486 if (bs == 1) mumps->val = aa->a; 487 row = mumps->irn; 488 col = mumps->jcn; 489 } 490 val = mumps->val; 491 492 nz = 0; 493 if (bs>1) { 494 for (i=0; i<mbs; i++) { 495 rnz = ai[i+1] - ai[i]; 496 ajj = aj + ai[i]; 497 for (j=0; j<rnz; j++) { 498 for (k=0; k<bs; k++) { 499 for (m=0; m<bs; m++) { 500 if (ajj[j]>i || k>=m) { 501 if (reuse == MAT_INITIAL_MATRIX) { 502 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr); 503 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr); 504 } 505 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs]; 506 } 507 } 508 } 509 } 510 } 511 } else if (reuse == MAT_INITIAL_MATRIX) { 512 for (i=0; i<mbs; i++) { 513 rnz = ai[i+1] - ai[i]; 514 ajj = aj + ai[i]; 515 for (j=0; j<rnz; j++) { 516 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 517 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 518 nz++; 519 } 520 } 521 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz); 522 } 523 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 524 PetscFunctionReturn(0); 525 } 526 527 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 528 { 529 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n; 530 PetscInt64 nz,rnz,i,j; 531 const PetscScalar *av,*v1; 532 PetscScalar *val; 533 PetscErrorCode ierr; 534 PetscMUMPSInt *row,*col; 535 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data; 536 PetscBool missing; 537 #if defined(PETSC_USE_COMPLEX) 538 PetscBool hermitian; 539 #endif 540 541 PetscFunctionBegin; 542 #if defined(PETSC_USE_COMPLEX) 543 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 544 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 545 #endif 546 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 547 ai = aa->i; aj = aa->j; 548 adiag = aa->diag; 549 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr); 550 if (reuse == MAT_INITIAL_MATRIX) { 551 /* count nz in the upper triangular part of A */ 552 nz = 0; 553 if (missing) { 554 for (i=0; i<M; i++) { 555 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 556 for (j=ai[i];j<ai[i+1];j++) { 557 if (aj[j] < i) continue; 558 nz++; 559 } 560 } else { 561 nz += ai[i+1] - adiag[i]; 562 } 563 } 564 } else { 565 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i]; 566 } 567 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 568 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 569 mumps->nnz = nz; 570 mumps->irn = row; 571 mumps->jcn = col; 572 mumps->val = mumps->val_alloc = val; 573 574 nz = 0; 575 if (missing) { 576 for (i=0; i<M; i++) { 577 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 578 for (j=ai[i];j<ai[i+1];j++) { 579 if (aj[j] < i) continue; 580 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 581 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr); 582 val[nz] = av[j]; 583 nz++; 584 } 585 } else { 586 rnz = ai[i+1] - adiag[i]; 587 ajj = aj + adiag[i]; 588 v1 = av + adiag[i]; 589 for (j=0; j<rnz; j++) { 590 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 591 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 592 val[nz++] = v1[j]; 593 } 594 } 595 } 596 } else { 597 for (i=0; i<M; i++) { 598 rnz = ai[i+1] - adiag[i]; 599 ajj = aj + adiag[i]; 600 v1 = av + adiag[i]; 601 for (j=0; j<rnz; j++) { 602 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr); 603 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr); 604 val[nz++] = v1[j]; 605 } 606 } 607 } 608 } else { 609 nz = 0; 610 val = mumps->val; 611 if (missing) { 612 for (i=0; i <M; i++) { 613 if (PetscUnlikely(adiag[i] >= ai[i+1])) { 614 for (j=ai[i];j<ai[i+1];j++) { 615 if (aj[j] < i) continue; 616 val[nz++] = av[j]; 617 } 618 } else { 619 rnz = ai[i+1] - adiag[i]; 620 v1 = av + adiag[i]; 621 for (j=0; j<rnz; j++) { 622 val[nz++] = v1[j]; 623 } 624 } 625 } 626 } else { 627 for (i=0; i <M; i++) { 628 rnz = ai[i+1] - adiag[i]; 629 v1 = av + adiag[i]; 630 for (j=0; j<rnz; j++) { 631 val[nz++] = v1[j]; 632 } 633 } 634 } 635 } 636 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 637 PetscFunctionReturn(0); 638 } 639 640 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 641 { 642 PetscErrorCode ierr; 643 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj; 644 PetscInt bs; 645 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB; 646 PetscMUMPSInt *row,*col; 647 const PetscScalar *av,*bv,*v1,*v2; 648 PetscScalar *val; 649 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 650 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data; 651 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 652 const PetscInt bs2=aa->bs2,mbs=aa->mbs; 653 #if defined(PETSC_USE_COMPLEX) 654 PetscBool hermitian; 655 #endif 656 657 PetscFunctionBegin; 658 #if defined(PETSC_USE_COMPLEX) 659 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 660 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 661 #endif 662 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 663 rstart = A->rmap->rstart; 664 ai = aa->i; 665 aj = aa->j; 666 bi = bb->i; 667 bj = bb->j; 668 av = aa->a; 669 bv = bb->a; 670 671 garray = mat->garray; 672 673 if (reuse == MAT_INITIAL_MATRIX) { 674 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */ 675 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 676 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 677 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 678 mumps->irn = row; 679 mumps->jcn = col; 680 mumps->val = mumps->val_alloc = val; 681 } else { 682 val = mumps->val; 683 } 684 685 jj = 0; irow = rstart; 686 for (i=0; i<mbs; i++) { 687 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 688 countA = ai[i+1] - ai[i]; 689 countB = bi[i+1] - bi[i]; 690 bjj = bj + bi[i]; 691 v1 = av + ai[i]*bs2; 692 v2 = bv + bi[i]*bs2; 693 694 if (bs>1) { 695 /* A-part */ 696 for (j=0; j<countA; j++) { 697 for (k=0; k<bs; k++) { 698 for (m=0; m<bs; m++) { 699 if (rstart + ajj[j]*bs>irow || k>=m) { 700 if (reuse == MAT_INITIAL_MATRIX) { 701 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 702 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 703 } 704 val[jj++] = v1[j*bs2 + m + k*bs]; 705 } 706 } 707 } 708 } 709 710 /* B-part */ 711 for (j=0; j < countB; j++) { 712 for (k=0; k<bs; k++) { 713 for (m=0; m<bs; m++) { 714 if (reuse == MAT_INITIAL_MATRIX) { 715 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr); 716 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr); 717 } 718 val[jj++] = v2[j*bs2 + m + k*bs]; 719 } 720 } 721 } 722 } else { 723 /* A-part */ 724 for (j=0; j<countA; j++) { 725 if (reuse == MAT_INITIAL_MATRIX) { 726 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 727 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 728 } 729 val[jj++] = v1[j]; 730 } 731 732 /* B-part */ 733 for (j=0; j < countB; j++) { 734 if (reuse == MAT_INITIAL_MATRIX) { 735 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 736 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 737 } 738 val[jj++] = v2[j]; 739 } 740 } 741 irow+=bs; 742 } 743 mumps->nnz = jj; 744 PetscFunctionReturn(0); 745 } 746 747 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 748 { 749 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 750 PetscErrorCode ierr; 751 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB; 752 PetscMUMPSInt *row,*col; 753 const PetscScalar *av, *bv,*v1,*v2; 754 PetscScalar *val; 755 Mat Ad,Ao; 756 Mat_SeqAIJ *aa; 757 Mat_SeqAIJ *bb; 758 759 PetscFunctionBegin; 760 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 761 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 762 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 763 764 aa = (Mat_SeqAIJ*)(Ad)->data; 765 bb = (Mat_SeqAIJ*)(Ao)->data; 766 ai = aa->i; 767 aj = aa->j; 768 bi = bb->i; 769 bj = bb->j; 770 771 rstart = A->rmap->rstart; 772 773 if (reuse == MAT_INITIAL_MATRIX) { 774 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 775 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 776 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 777 mumps->nnz = nz; 778 mumps->irn = row; 779 mumps->jcn = col; 780 mumps->val = mumps->val_alloc = val; 781 } else { 782 val = mumps->val; 783 } 784 785 jj = 0; irow = rstart; 786 for (i=0; i<m; i++) { 787 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 788 countA = ai[i+1] - ai[i]; 789 countB = bi[i+1] - bi[i]; 790 bjj = bj + bi[i]; 791 v1 = av + ai[i]; 792 v2 = bv + bi[i]; 793 794 /* A-part */ 795 for (j=0; j<countA; j++) { 796 if (reuse == MAT_INITIAL_MATRIX) { 797 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 798 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 799 } 800 val[jj++] = v1[j]; 801 } 802 803 /* B-part */ 804 for (j=0; j < countB; j++) { 805 if (reuse == MAT_INITIAL_MATRIX) { 806 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 807 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 808 } 809 val[jj++] = v2[j]; 810 } 811 irow++; 812 } 813 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 814 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 819 { 820 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data; 821 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data; 822 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data; 823 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj; 824 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart; 825 const PetscInt bs2=mat->bs2; 826 PetscErrorCode ierr; 827 PetscInt bs; 828 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx; 829 PetscMUMPSInt *row,*col; 830 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2; 831 PetscScalar *val; 832 833 PetscFunctionBegin; 834 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr); 835 if (reuse == MAT_INITIAL_MATRIX) { 836 nz = bs2*(aa->nz + bb->nz); 837 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 838 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 839 mumps->nnz = nz; 840 mumps->irn = row; 841 mumps->jcn = col; 842 mumps->val = mumps->val_alloc = val; 843 } else { 844 val = mumps->val; 845 } 846 847 jj = 0; irow = rstart; 848 for (i=0; i<mbs; i++) { 849 countA = ai[i+1] - ai[i]; 850 countB = bi[i+1] - bi[i]; 851 ajj = aj + ai[i]; 852 bjj = bj + bi[i]; 853 v1 = av + bs2*ai[i]; 854 v2 = bv + bs2*bi[i]; 855 856 idx = 0; 857 /* A-part */ 858 for (k=0; k<countA; k++) { 859 for (j=0; j<bs; j++) { 860 for (n=0; n<bs; n++) { 861 if (reuse == MAT_INITIAL_MATRIX) { 862 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 863 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr); 864 } 865 val[jj++] = v1[idx++]; 866 } 867 } 868 } 869 870 idx = 0; 871 /* B-part */ 872 for (k=0; k<countB; k++) { 873 for (j=0; j<bs; j++) { 874 for (n=0; n<bs; n++) { 875 if (reuse == MAT_INITIAL_MATRIX) { 876 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr); 877 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr); 878 } 879 val[jj++] = v2[idx++]; 880 } 881 } 882 } 883 irow += bs; 884 } 885 PetscFunctionReturn(0); 886 } 887 888 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps) 889 { 890 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj; 891 PetscErrorCode ierr; 892 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB; 893 PetscMUMPSInt *row,*col; 894 const PetscScalar *av, *bv,*v1,*v2; 895 PetscScalar *val; 896 Mat Ad,Ao; 897 Mat_SeqAIJ *aa; 898 Mat_SeqAIJ *bb; 899 #if defined(PETSC_USE_COMPLEX) 900 PetscBool hermitian; 901 #endif 902 903 PetscFunctionBegin; 904 #if defined(PETSC_USE_COMPLEX) 905 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr); 906 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy"); 907 #endif 908 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr); 909 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr); 910 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr); 911 912 aa = (Mat_SeqAIJ*)(Ad)->data; 913 bb = (Mat_SeqAIJ*)(Ao)->data; 914 ai = aa->i; 915 aj = aa->j; 916 adiag = aa->diag; 917 bi = bb->i; 918 bj = bb->j; 919 920 rstart = A->rmap->rstart; 921 922 if (reuse == MAT_INITIAL_MATRIX) { 923 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 924 nzb = 0; /* num of upper triangular entries in mat->B */ 925 for (i=0; i<m; i++) { 926 nza += (ai[i+1] - adiag[i]); 927 countB = bi[i+1] - bi[i]; 928 bjj = bj + bi[i]; 929 for (j=0; j<countB; j++) { 930 if (garray[bjj[j]] > rstart) nzb++; 931 } 932 } 933 934 nz = nza + nzb; /* total nz of upper triangular part of mat */ 935 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr); 936 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr); 937 mumps->nnz = nz; 938 mumps->irn = row; 939 mumps->jcn = col; 940 mumps->val = mumps->val_alloc = val; 941 } else { 942 val = mumps->val; 943 } 944 945 jj = 0; irow = rstart; 946 for (i=0; i<m; i++) { 947 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 948 v1 = av + adiag[i]; 949 countA = ai[i+1] - adiag[i]; 950 countB = bi[i+1] - bi[i]; 951 bjj = bj + bi[i]; 952 v2 = bv + bi[i]; 953 954 /* A-part */ 955 for (j=0; j<countA; j++) { 956 if (reuse == MAT_INITIAL_MATRIX) { 957 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 958 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr); 959 } 960 val[jj++] = v1[j]; 961 } 962 963 /* B-part */ 964 for (j=0; j < countB; j++) { 965 if (garray[bjj[j]] > rstart) { 966 if (reuse == MAT_INITIAL_MATRIX) { 967 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr); 968 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr); 969 } 970 val[jj++] = v2[j]; 971 } 972 } 973 irow++; 974 } 975 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr); 976 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatDestroy_MUMPS(Mat A) 981 { 982 PetscErrorCode ierr; 983 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 984 985 PetscFunctionBegin; 986 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr); 988 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 989 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr); 990 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 991 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr); 992 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 993 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 994 ierr = PetscFree(mumps->info);CHKERRQ(ierr); 995 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 996 mumps->id.job = JOB_END; 997 PetscMUMPS_c(mumps); 998 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 999 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1000 if (mumps->use_petsc_omp_support) { 1001 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); 1002 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1003 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr); 1004 } 1005 #endif 1006 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr); 1007 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr); 1008 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr); 1009 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr); 1010 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr); 1011 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRQ(ierr);} 1012 ierr = PetscFree(A->data);CHKERRQ(ierr); 1013 1014 /* clear composed functions */ 1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr); 1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr); 1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr); 1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr); 1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr); 1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr); 1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr); 1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr); 1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr); 1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr); 1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr); 1027 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1032 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array) 1033 { 1034 PetscErrorCode ierr; 1035 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1036 const PetscMPIInt ompsize=mumps->omp_comm_size; 1037 PetscInt i,m,M,rstart; 1038 1039 PetscFunctionBegin; 1040 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 1041 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 1042 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt"); 1043 if (ompsize == 1) { 1044 if (!mumps->irhs_loc) { 1045 mumps->nloc_rhs = m; 1046 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 1048 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */ 1049 } 1050 mumps->id.rhs_loc = (MumpsScalar*)array; 1051 } else { 1052 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1053 const PetscInt *ranges; 1054 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks; 1055 MPI_Group petsc_group,omp_group; 1056 PetscScalar *recvbuf=NULL; 1057 1058 if (mumps->is_omp_master) { 1059 /* Lazily initialize the omp stuff for distributed rhs */ 1060 if (!mumps->irhs_loc) { 1061 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr); 1062 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr); 1063 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr); 1064 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr); 1065 for (j=0; j<ompsize; j++) omp_ranks[j] = j; 1066 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr); 1067 1068 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1069 mumps->nloc_rhs = 0; 1070 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr); 1071 for (j=0; j<ompsize; j++) { 1072 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]]; 1073 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1074 } 1075 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr); 1076 for (j=k=0; j<ompsize; j++) { 1077 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */ 1078 } 1079 1080 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr); 1081 ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr); 1082 ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr); 1083 } 1084 1085 /* Realloc buffers when current nrhs is bigger than what we have met */ 1086 if (nrhs > mumps->max_nrhs) { 1087 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr); 1088 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr); 1089 mumps->max_nrhs = nrhs; 1090 } 1091 1092 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1093 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);} 1094 mumps->rhs_disps[0] = 0; 1095 for (j=1; j<ompsize; j++) { 1096 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1]; 1097 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!"); 1098 } 1099 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1100 } 1101 1102 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr); 1103 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr); 1104 1105 if (mumps->is_omp_master) { 1106 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1107 PetscScalar *dst,*dstbase = mumps->rhs_loc; 1108 for (j=0; j<ompsize; j++) { 1109 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1110 dst = dstbase; 1111 for (i=0; i<nrhs; i++) { 1112 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr); 1113 src += mumps->rhs_nrow[j]; 1114 dst += mumps->nloc_rhs; 1115 } 1116 dstbase += mumps->rhs_nrow[j]; 1117 } 1118 } 1119 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc; 1120 } 1121 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1122 } 1123 mumps->id.nrhs = nrhs; 1124 mumps->id.nloc_rhs = mumps->nloc_rhs; 1125 mumps->id.lrhs_loc = mumps->nloc_rhs; 1126 mumps->id.irhs_loc = mumps->irhs_loc; 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) 1131 { 1132 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1133 const PetscScalar *rarray = NULL; 1134 PetscScalar *array; 1135 IS is_iden,is_petsc; 1136 PetscErrorCode ierr; 1137 PetscInt i; 1138 PetscBool second_solve = PETSC_FALSE; 1139 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE; 1140 1141 PetscFunctionBegin; 1142 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr); 1143 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr); 1144 1145 if (A->factorerrortype) { 1146 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1147 ierr = VecSetInf(x);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 mumps->id.nrhs = 1; 1152 if (mumps->petsc_size > 1) { 1153 if (mumps->ICNTL20 == 10) { 1154 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1155 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr); 1156 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr); 1157 } else { 1158 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/ 1159 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 if (!mumps->myid) { 1162 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr); 1163 mumps->id.rhs = (MumpsScalar*)array; 1164 } 1165 } 1166 } else { /* petsc_size == 1 */ 1167 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1168 ierr = VecCopy(b,x);CHKERRQ(ierr); 1169 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 1170 mumps->id.rhs = (MumpsScalar*)array; 1171 } 1172 1173 /* 1174 handle condensation step of Schur complement (if any) 1175 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1176 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1177 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1178 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1179 */ 1180 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1181 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1182 second_solve = PETSC_TRUE; 1183 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1184 } 1185 /* solve phase */ 1186 /*-------------*/ 1187 mumps->id.job = JOB_SOLVE; 1188 PetscMUMPS_c(mumps); 1189 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1190 1191 /* handle expansion step of Schur complement (if any) */ 1192 if (second_solve) { 1193 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1194 } 1195 1196 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */ 1197 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1198 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1199 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1200 } 1201 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1202 PetscInt *isol2_loc=NULL; 1203 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */ 1204 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr); 1205 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */ 1206 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */ 1207 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_iden);CHKERRQ(ierr); 1209 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr); 1210 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 1211 } 1212 1213 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 } 1216 1217 if (mumps->petsc_size > 1) { 1218 if (mumps->ICNTL20 == 10) { 1219 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr); 1220 } else if (!mumps->myid) { 1221 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr); 1222 } 1223 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);} 1224 1225 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x) 1230 { 1231 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 mumps->id.ICNTL(9) = 0; 1236 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr); 1237 mumps->id.ICNTL(9) = 1; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X) 1242 { 1243 PetscErrorCode ierr; 1244 Mat Bt = NULL; 1245 PetscBool denseX,denseB,flg,flgT; 1246 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 1247 PetscInt i,nrhs,M; 1248 PetscScalar *array; 1249 const PetscScalar *rbray; 1250 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0; 1251 PetscMUMPSInt *isol_loc,*isol_loc_save; 1252 PetscScalar *bray,*sol_loc,*sol_loc_save; 1253 IS is_to,is_from; 1254 PetscInt k,proc,j,m,myrstart; 1255 const PetscInt *rstart; 1256 Vec v_mpi,msol_loc; 1257 VecScatter scat_sol; 1258 Vec b_seq; 1259 VecScatter scat_rhs; 1260 PetscScalar *aa; 1261 PetscInt spnr,*ia,*ja; 1262 Mat_MPIAIJ *b = NULL; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1266 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 1267 1268 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 1269 if (denseB) { 1270 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution"); 1271 mumps->id.ICNTL(20)= 0; /* dense RHS */ 1272 } else { /* sparse B */ 1273 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices"); 1274 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr); 1275 if (flgT) { /* input B is transpose of actural RHS matrix, 1276 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 1277 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr); 1278 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix"); 1279 mumps->id.ICNTL(20)= 1; /* sparse RHS */ 1280 } 1281 1282 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr); 1283 mumps->id.nrhs = nrhs; 1284 mumps->id.lrhs = M; 1285 mumps->id.rhs = NULL; 1286 1287 if (mumps->petsc_size == 1) { 1288 PetscScalar *aa; 1289 PetscInt spnr,*ia,*ja; 1290 PetscBool second_solve = PETSC_FALSE; 1291 1292 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1293 mumps->id.rhs = (MumpsScalar*)array; 1294 1295 if (denseB) { 1296 /* copy B to X */ 1297 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1298 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr); 1299 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1300 } else { /* sparse B */ 1301 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr); 1302 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1303 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1304 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1305 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1306 } 1307 /* handle condensation step of Schur complement (if any) */ 1308 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) { 1309 second_solve = PETSC_TRUE; 1310 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr); 1311 } 1312 /* solve phase */ 1313 /*-------------*/ 1314 mumps->id.job = JOB_SOLVE; 1315 PetscMUMPS_c(mumps); 1316 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1317 1318 /* handle expansion step of Schur complement (if any) */ 1319 if (second_solve) { 1320 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr); 1321 } 1322 if (!denseB) { /* sparse B */ 1323 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr); 1324 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1325 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1326 } 1327 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/ 1332 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n"); 1333 1334 /* create msol_loc to hold mumps local solution */ 1335 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */ 1336 sol_loc_save = (PetscScalar*)mumps->id.sol_loc; 1337 1338 lsol_loc = mumps->id.lsol_loc; 1339 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */ 1340 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr); 1341 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1342 mumps->id.isol_loc = isol_loc; 1343 1344 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr); 1345 1346 if (denseB) { 1347 if (mumps->ICNTL20 == 10) { 1348 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 1349 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr); 1350 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr); 1351 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr); 1352 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1353 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr); 1354 } else { 1355 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 1356 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 1357 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 1358 0, re-arrange B into desired order, which is a local operation. 1359 */ 1360 1361 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 1362 /* wrap dense rhs matrix B into a vector v_mpi */ 1363 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr); 1364 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr); 1365 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1366 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr); 1367 1368 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */ 1369 if (!mumps->myid) { 1370 PetscInt *idx; 1371 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 1372 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr); 1373 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr); 1374 k = 0; 1375 for (proc=0; proc<mumps->petsc_size; proc++){ 1376 for (j=0; j<nrhs; j++){ 1377 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i; 1378 } 1379 } 1380 1381 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr); 1382 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr); 1383 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr); 1384 } else { 1385 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr); 1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr); 1387 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr); 1388 } 1389 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr); 1390 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1392 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1393 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1394 1395 if (!mumps->myid) { /* define rhs on the host */ 1396 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr); 1397 mumps->id.rhs = (MumpsScalar*)bray; 1398 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr); 1399 } 1400 } 1401 } else { /* sparse B */ 1402 b = (Mat_MPIAIJ*)Bt->data; 1403 1404 /* wrap dense X into a vector v_mpi */ 1405 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr); 1406 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr); 1407 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr); 1408 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr); 1409 1410 if (!mumps->myid) { 1411 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr); 1412 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1413 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 1414 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 1415 mumps->id.rhs_sparse = (MumpsScalar*)aa; 1416 } else { 1417 mumps->id.irhs_ptr = NULL; 1418 mumps->id.irhs_sparse = NULL; 1419 mumps->id.nz_rhs = 0; 1420 mumps->id.rhs_sparse = NULL; 1421 } 1422 } 1423 1424 /* solve phase */ 1425 /*-------------*/ 1426 mumps->id.job = JOB_SOLVE; 1427 PetscMUMPS_c(mumps); 1428 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1429 1430 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */ 1431 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr); 1432 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr); 1433 1434 /* create scatter scat_sol */ 1435 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr); 1436 /* iidx: index for scatter mumps solution to petsc X */ 1437 1438 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr); 1439 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr); 1440 for (i=0; i<lsol_loc; i++) { 1441 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 1442 1443 for (proc=0; proc<mumps->petsc_size; proc++){ 1444 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) { 1445 myrstart = rstart[proc]; 1446 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */ 1447 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */ 1448 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */ 1449 break; 1450 } 1451 } 1452 1453 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m; 1454 } 1455 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr); 1456 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr); 1457 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 ierr = ISDestroy(&is_from);CHKERRQ(ierr); 1459 ierr = ISDestroy(&is_to);CHKERRQ(ierr); 1460 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr); 1462 1463 /* free spaces */ 1464 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save; 1465 mumps->id.isol_loc = isol_loc_save; 1466 1467 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr); 1468 ierr = PetscFree(idxx);CHKERRQ(ierr); 1469 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr); 1470 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr); 1471 if (!denseB) { 1472 if (!mumps->myid) { 1473 b = (Mat_MPIAIJ*)Bt->data; 1474 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr); 1475 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 1476 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure"); 1477 } 1478 } else { 1479 if (mumps->ICNTL20 == 0) { 1480 ierr = VecDestroy(&b_seq);CHKERRQ(ierr); 1481 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr); 1482 } 1483 } 1484 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr); 1485 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X) 1490 { 1491 PetscErrorCode ierr; 1492 PetscBool flg; 1493 Mat B; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 1497 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix"); 1498 1499 /* Create B=Bt^T that uses Bt's data structure */ 1500 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr); 1501 1502 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr); 1503 ierr = MatDestroy(&B);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #if !defined(PETSC_USE_COMPLEX) 1508 /* 1509 input: 1510 F: numeric factor 1511 output: 1512 nneg: total number of negative pivots 1513 nzero: total number of zero pivots 1514 npos: (global dimension of F) - nneg - nzero 1515 */ 1516 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 1517 { 1518 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 1519 PetscErrorCode ierr; 1520 PetscMPIInt size; 1521 1522 PetscFunctionBegin; 1523 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr); 1524 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 1525 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13)); 1526 1527 if (nneg) *nneg = mumps->id.INFOG(12); 1528 if (nzero || npos) { 1529 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 1530 if (nzero) *nzero = mumps->id.INFOG(28); 1531 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 #endif 1536 1537 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps) 1538 { 1539 PetscErrorCode ierr; 1540 PetscInt i,nreqs; 1541 PetscMUMPSInt *irn,*jcn; 1542 PetscMPIInt count; 1543 PetscInt64 totnnz,remain; 1544 const PetscInt osize=mumps->omp_comm_size; 1545 PetscScalar *val; 1546 1547 PetscFunctionBegin; 1548 if (osize > 1) { 1549 if (reuse == MAT_INITIAL_MATRIX) { 1550 /* master first gathers counts of nonzeros to receive */ 1551 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);} 1552 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr); 1553 1554 /* Then each computes number of send/recvs */ 1555 if (mumps->is_omp_master) { 1556 /* Start from 1 since self communication is not done in MPI */ 1557 nreqs = 0; 1558 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1559 } else { 1560 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX; 1561 } 1562 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */ 1563 1564 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 1565 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 1566 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 1567 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 1568 */ 1569 nreqs = 0; /* counter for actual send/recvs */ 1570 if (mumps->is_omp_master) { 1571 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 1572 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr); 1573 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr); 1574 1575 /* Self communication */ 1576 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr); 1577 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr); 1578 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr); 1579 1580 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 1581 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr); 1582 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr); 1583 mumps->nnz = totnnz; 1584 mumps->irn = irn; 1585 mumps->jcn = jcn; 1586 mumps->val = mumps->val_alloc = val; 1587 1588 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 1589 jcn += mumps->recvcount[0]; 1590 val += mumps->recvcount[0]; 1591 1592 /* Remote communication */ 1593 for (i=1; i<osize; i++) { 1594 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1595 remain = mumps->recvcount[i] - count; 1596 while (count>0) { 1597 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1598 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1599 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1600 irn += count; 1601 jcn += count; 1602 val += count; 1603 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1604 remain -= count; 1605 } 1606 } 1607 } else { 1608 irn = mumps->irn; 1609 jcn = mumps->jcn; 1610 val = mumps->val; 1611 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1612 remain = mumps->nnz - count; 1613 while (count>0) { 1614 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1615 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1616 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1617 irn += count; 1618 jcn += count; 1619 val += count; 1620 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1621 remain -= count; 1622 } 1623 } 1624 } else { 1625 nreqs = 0; 1626 if (mumps->is_omp_master) { 1627 val = mumps->val + mumps->recvcount[0]; 1628 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */ 1629 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX); 1630 remain = mumps->recvcount[i] - count; 1631 while (count>0) { 1632 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1633 val += count; 1634 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1635 remain -= count; 1636 } 1637 } 1638 } else { 1639 val = mumps->val; 1640 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX); 1641 remain = mumps->nnz - count; 1642 while (count>0) { 1643 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr); 1644 val += count; 1645 count = PetscMin(remain,PETSC_MPI_INT_MAX); 1646 remain -= count; 1647 } 1648 } 1649 } 1650 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1651 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info) 1657 { 1658 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data; 1659 PetscErrorCode ierr; 1660 PetscBool isMPIAIJ; 1661 1662 PetscFunctionBegin; 1663 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 1664 if (mumps->id.INFOG(1) == -6) { 1665 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1666 } 1667 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr); 1672 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr); 1673 1674 /* numerical factorization phase */ 1675 /*-------------------------------*/ 1676 mumps->id.job = JOB_FACTNUMERIC; 1677 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 1678 if (!mumps->myid) { 1679 mumps->id.a = (MumpsScalar*)mumps->val; 1680 } 1681 } else { 1682 mumps->id.a_loc = (MumpsScalar*)mumps->val; 1683 } 1684 PetscMUMPS_c(mumps); 1685 if (mumps->id.INFOG(1) < 0) { 1686 if (A->erroriffailure) { 1687 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 1688 } else { 1689 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */ 1690 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1691 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1692 } else if (mumps->id.INFOG(1) == -13) { 1693 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1694 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1695 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 1696 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1697 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1698 } else { 1699 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1700 F->factorerrortype = MAT_FACTOR_OTHER; 1701 } 1702 } 1703 } 1704 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16)); 1705 1706 F->assembled = PETSC_TRUE; 1707 mumps->matstruc = SAME_NONZERO_PATTERN; 1708 if (F->schur) { /* reset Schur status to unfactored */ 1709 #if defined(PETSC_HAVE_CUDA) 1710 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 1711 #endif 1712 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 1713 mumps->id.ICNTL(19) = 2; 1714 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr); 1715 } 1716 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr); 1717 } 1718 1719 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 1720 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 1721 1722 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 1723 if (mumps->petsc_size > 1) { 1724 PetscInt lsol_loc; 1725 PetscScalar *sol_loc; 1726 1727 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr); 1728 1729 /* distributed solution; Create x_seq=sol_loc for repeated use */ 1730 if (mumps->x_seq) { 1731 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr); 1732 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr); 1733 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr); 1734 } 1735 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 1736 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr); 1737 mumps->id.lsol_loc = lsol_loc; 1738 mumps->id.sol_loc = (MumpsScalar*)sol_loc; 1739 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr); 1740 } 1741 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr); 1742 PetscFunctionReturn(0); 1743 } 1744 1745 /* Sets MUMPS options from the options database */ 1746 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A) 1747 { 1748 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1749 PetscErrorCode ierr; 1750 PetscMUMPSInt icntl=0; 1751 PetscInt info[80],i,ninfo=80; 1752 PetscBool flg=PETSC_FALSE; 1753 1754 PetscFunctionBegin; 1755 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 1756 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr); 1757 if (flg) mumps->id.ICNTL(1) = icntl; 1758 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr); 1759 if (flg) mumps->id.ICNTL(2) = icntl; 1760 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr); 1761 if (flg) mumps->id.ICNTL(3) = icntl; 1762 1763 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 1764 if (flg) mumps->id.ICNTL(4) = icntl; 1765 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 1766 1767 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr); 1768 if (flg) mumps->id.ICNTL(6) = icntl; 1769 1770 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 1771 if (flg) { 1772 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n"); 1773 else mumps->id.ICNTL(7) = icntl; 1774 } 1775 1776 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr); 1777 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */ 1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr); 1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr); 1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr); 1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr); 1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr); 1783 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr); 1784 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 1785 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 1786 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr); 1787 } 1788 1789 /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake. 1790 So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 1791 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 1792 */ 1793 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 1794 mumps->ICNTL20 = 10; /* Distributed dense RHS*/ 1795 #else 1796 mumps->ICNTL20 = 0; /* Centralized dense RHS*/ 1797 #endif 1798 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr); 1799 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20); 1800 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) 1801 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n"); 1802 #endif 1803 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */ 1804 1805 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr); 1806 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr); 1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr); 1808 if (mumps->id.ICNTL(24)) { 1809 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 1810 } 1811 1812 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr); 1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr); 1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr); 1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr); 1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr); 1817 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */ 1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr); 1819 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */ 1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr); 1821 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr); 1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr); 1823 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr); 1824 1825 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr); 1826 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr); 1827 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr); 1828 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr); 1829 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr); 1830 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr); 1831 1832 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr); 1833 1834 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr); 1835 if (ninfo) { 1836 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo); 1837 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr); 1838 mumps->ninfo = ninfo; 1839 for (i=0; i<ninfo; i++) { 1840 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo); 1841 else mumps->info[i] = info[i]; 1842 } 1843 } 1844 1845 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 1849 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps) 1850 { 1851 PetscErrorCode ierr; 1852 PetscInt nthreads=0; 1853 MPI_Comm newcomm=MPI_COMM_NULL; 1854 1855 PetscFunctionBegin; 1856 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 1857 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr); 1858 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */ 1859 1860 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr); 1861 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 1862 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr); 1863 if (mumps->use_petsc_omp_support) { 1864 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1865 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr); 1866 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr); 1867 #else 1868 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n"); 1869 #endif 1870 } else { 1871 mumps->omp_comm = PETSC_COMM_SELF; 1872 mumps->mumps_comm = mumps->petsc_comm; 1873 mumps->is_omp_master = PETSC_TRUE; 1874 } 1875 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr); 1876 mumps->reqs = NULL; 1877 mumps->tag = 0; 1878 1879 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 1880 if (mumps->mumps_comm != MPI_COMM_NULL) { 1881 ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRQ(ierr); 1882 mumps->mumps_comm = newcomm; 1883 } 1884 1885 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 1886 mumps->id.job = JOB_INIT; 1887 mumps->id.par = 1; /* host participates factorizaton and solve */ 1888 mumps->id.sym = mumps->sym; 1889 1890 PetscMUMPS_c(mumps); 1891 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1892 1893 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 1894 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 1895 */ 1896 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */ 1897 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); 1898 1899 mumps->scat_rhs = NULL; 1900 mumps->scat_sol = NULL; 1901 1902 /* set PETSc-MUMPS default options - override MUMPS default */ 1903 mumps->id.ICNTL(3) = 0; 1904 mumps->id.ICNTL(4) = 0; 1905 if (mumps->petsc_size == 1) { 1906 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 1907 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 1908 } else { 1909 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 1910 mumps->id.ICNTL(21) = 1; /* distributed solution */ 1911 } 1912 1913 /* schur */ 1914 mumps->id.size_schur = 0; 1915 mumps->id.listvar_schur = NULL; 1916 mumps->id.schur = NULL; 1917 mumps->sizeredrhs = 0; 1918 mumps->schur_sol = NULL; 1919 mumps->schur_sizesol = 0; 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps) 1924 { 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 if (mumps->id.INFOG(1) < 0) { 1929 if (A->erroriffailure) { 1930 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1)); 1931 } else { 1932 if (mumps->id.INFOG(1) == -6) { 1933 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1934 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 1935 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 1936 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1937 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 1938 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) { 1939 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr); 1940 } else { 1941 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr); 1942 F->factorerrortype = MAT_FACTOR_OTHER; 1943 } 1944 } 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */ 1950 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1951 { 1952 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 1953 PetscErrorCode ierr; 1954 Vec b; 1955 const PetscInt M = A->rmap->N; 1956 1957 PetscFunctionBegin; 1958 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 1959 1960 /* Set MUMPS options from the options database */ 1961 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 1962 1963 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 1964 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 1965 1966 /* analysis phase */ 1967 /*----------------*/ 1968 mumps->id.job = JOB_FACTSYMBOLIC; 1969 mumps->id.n = M; 1970 switch (mumps->id.ICNTL(18)) { 1971 case 0: /* centralized assembled matrix input */ 1972 if (!mumps->myid) { 1973 mumps->id.nnz = mumps->nnz; 1974 mumps->id.irn = mumps->irn; 1975 mumps->id.jcn = mumps->jcn; 1976 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val; 1977 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */ 1978 if (!mumps->myid) { 1979 const PetscInt *idx; 1980 PetscInt i; 1981 1982 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr); 1983 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr); 1984 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */ 1985 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr); 1986 } 1987 } 1988 } 1989 break; 1990 case 3: /* distributed assembled matrix input (size>1) */ 1991 mumps->id.nnz_loc = mumps->nnz; 1992 mumps->id.irn_loc = mumps->irn; 1993 mumps->id.jcn_loc = mumps->jcn; 1994 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val; 1995 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 1996 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 1997 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 1998 ierr = VecDestroy(&b);CHKERRQ(ierr); 1999 } 2000 break; 2001 } 2002 PetscMUMPS_c(mumps); 2003 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2004 2005 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2006 F->ops->solve = MatSolve_MUMPS; 2007 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2008 F->ops->matsolve = MatMatSolve_MUMPS; 2009 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2010 PetscFunctionReturn(0); 2011 } 2012 2013 /* Note the Petsc r and c permutations are ignored */ 2014 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 2015 { 2016 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2017 PetscErrorCode ierr; 2018 Vec b; 2019 const PetscInt M = A->rmap->N; 2020 2021 PetscFunctionBegin; 2022 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2023 2024 /* Set MUMPS options from the options database */ 2025 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2026 2027 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2028 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2029 2030 /* analysis phase */ 2031 /*----------------*/ 2032 mumps->id.job = JOB_FACTSYMBOLIC; 2033 mumps->id.n = M; 2034 switch (mumps->id.ICNTL(18)) { 2035 case 0: /* centralized assembled matrix input */ 2036 if (!mumps->myid) { 2037 mumps->id.nnz = mumps->nnz; 2038 mumps->id.irn = mumps->irn; 2039 mumps->id.jcn = mumps->jcn; 2040 if (mumps->id.ICNTL(6)>1) { 2041 mumps->id.a = (MumpsScalar*)mumps->val; 2042 } 2043 } 2044 break; 2045 case 3: /* distributed assembled matrix input (size>1) */ 2046 mumps->id.nnz_loc = mumps->nnz; 2047 mumps->id.irn_loc = mumps->irn; 2048 mumps->id.jcn_loc = mumps->jcn; 2049 if (mumps->id.ICNTL(6)>1) { 2050 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2051 } 2052 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2053 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2054 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2055 ierr = VecDestroy(&b);CHKERRQ(ierr); 2056 } 2057 break; 2058 } 2059 PetscMUMPS_c(mumps); 2060 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2061 2062 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2063 F->ops->solve = MatSolve_MUMPS; 2064 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* Note the Petsc r permutation and factor info are ignored */ 2069 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info) 2070 { 2071 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data; 2072 PetscErrorCode ierr; 2073 Vec b; 2074 const PetscInt M = A->rmap->N; 2075 2076 PetscFunctionBegin; 2077 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 2078 2079 /* Set MUMPS options from the options database */ 2080 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr); 2081 2082 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr); 2083 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr); 2084 2085 /* analysis phase */ 2086 /*----------------*/ 2087 mumps->id.job = JOB_FACTSYMBOLIC; 2088 mumps->id.n = M; 2089 switch (mumps->id.ICNTL(18)) { 2090 case 0: /* centralized assembled matrix input */ 2091 if (!mumps->myid) { 2092 mumps->id.nnz = mumps->nnz; 2093 mumps->id.irn = mumps->irn; 2094 mumps->id.jcn = mumps->jcn; 2095 if (mumps->id.ICNTL(6)>1) { 2096 mumps->id.a = (MumpsScalar*)mumps->val; 2097 } 2098 } 2099 break; 2100 case 3: /* distributed assembled matrix input (size>1) */ 2101 mumps->id.nnz_loc = mumps->nnz; 2102 mumps->id.irn_loc = mumps->irn; 2103 mumps->id.jcn_loc = mumps->jcn; 2104 if (mumps->id.ICNTL(6)>1) { 2105 mumps->id.a_loc = (MumpsScalar*)mumps->val; 2106 } 2107 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2108 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr); 2109 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr); 2110 ierr = VecDestroy(&b);CHKERRQ(ierr); 2111 } 2112 break; 2113 } 2114 PetscMUMPS_c(mumps); 2115 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr); 2116 2117 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 2118 F->ops->solve = MatSolve_MUMPS; 2119 F->ops->solvetranspose = MatSolve_MUMPS; 2120 F->ops->matsolve = MatMatSolve_MUMPS; 2121 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2122 #if defined(PETSC_USE_COMPLEX) 2123 F->ops->getinertia = NULL; 2124 #else 2125 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 2126 #endif 2127 PetscFunctionReturn(0); 2128 } 2129 2130 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) 2131 { 2132 PetscErrorCode ierr; 2133 PetscBool iascii; 2134 PetscViewerFormat format; 2135 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data; 2136 2137 PetscFunctionBegin; 2138 /* check if matrix is mumps type */ 2139 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0); 2140 2141 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 2142 if (iascii) { 2143 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2144 if (format == PETSC_VIEWER_ASCII_INFO) { 2145 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 2146 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr); 2147 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr); 2149 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr); 2150 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr); 2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr); 2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr); 2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr); 2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr); 2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr); 2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr); 2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr); 2158 if (mumps->id.ICNTL(11)>0) { 2159 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr); 2160 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr); 2161 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr); 2162 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr); 2163 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr); 2164 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr); 2165 } 2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr); 2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr); 2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr); 2169 /* ICNTL(15-17) not used */ 2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr); 2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr); 2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr); 2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr); 2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr); 2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr); 2176 2177 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr); 2178 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr); 2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr); 2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr); 2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr); 2182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr); 2183 2184 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr); 2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr); 2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr); 2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr); 2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr); 2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr); 2190 2191 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr); 2192 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr); 2193 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr); 2194 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr); 2195 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr); 2196 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr); 2197 2198 /* infomation local to each processor */ 2199 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 2200 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 2201 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr); 2202 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2203 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 2204 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr); 2205 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2206 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 2207 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr); 2208 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2209 2210 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr); 2211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr); 2212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2213 2214 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr); 2215 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr); 2216 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2217 2218 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr); 2219 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr); 2220 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2221 2222 if (mumps->ninfo && mumps->ninfo <= 80){ 2223 PetscInt i; 2224 for (i=0; i<mumps->ninfo; i++){ 2225 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr); 2226 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr); 2227 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 2228 } 2229 } 2230 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 2231 2232 if (!mumps->myid) { /* information from the host */ 2233 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr); 2234 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr); 2235 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr); 2236 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr); 2237 2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr); 2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr); 2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr); 2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr); 2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr); 2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr); 2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr); 2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr); 2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr); 2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr); 2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr); 2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr); 2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr); 2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr); 2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr); 2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr); 2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr); 2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr); 2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr); 2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr); 2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr); 2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr); 2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr); 2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr); 2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr); 2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr); 2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr); 2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr); 2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr); 2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr); 2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr); 2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr); 2272 } 2273 } 2274 } 2275 PetscFunctionReturn(0); 2276 } 2277 2278 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info) 2279 { 2280 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data; 2281 2282 PetscFunctionBegin; 2283 info->block_size = 1.0; 2284 info->nz_allocated = mumps->id.INFOG(20); 2285 info->nz_used = mumps->id.INFOG(20); 2286 info->nz_unneeded = 0.0; 2287 info->assemblies = 0.0; 2288 info->mallocs = 0.0; 2289 info->memory = 0.0; 2290 info->fill_ratio_given = 0; 2291 info->fill_ratio_needed = 0; 2292 info->factor_mallocs = 0; 2293 PetscFunctionReturn(0); 2294 } 2295 2296 /* -------------------------------------------------------------------------------------------*/ 2297 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 2298 { 2299 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2300 const PetscScalar *arr; 2301 const PetscInt *idxs; 2302 PetscInt size,i; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr); 2307 if (mumps->petsc_size > 1) { 2308 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */ 2309 2310 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2311 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr); 2312 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n"); 2313 } 2314 2315 /* Schur complement matrix */ 2316 ierr = MatDestroy(&F->schur);CHKERRQ(ierr); 2317 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr); 2318 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr); 2319 mumps->id.schur = (MumpsScalar*)arr; 2320 mumps->id.size_schur = size; 2321 mumps->id.schur_lld = size; 2322 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr); 2323 if (mumps->sym == 1) { 2324 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 2325 } 2326 2327 /* MUMPS expects Fortran style indices */ 2328 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr); 2329 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr); 2330 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 2331 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);} 2332 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 2333 if (mumps->petsc_size > 1) { 2334 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2335 } else { 2336 if (F->factortype == MAT_FACTOR_LU) { 2337 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2338 } else { 2339 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2340 } 2341 } 2342 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 2343 mumps->id.ICNTL(26) = -1; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /* -------------------------------------------------------------------------------------------*/ 2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S) 2349 { 2350 Mat St; 2351 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2352 PetscScalar *array; 2353 #if defined(PETSC_USE_COMPLEX) 2354 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0); 2355 #endif 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it"); 2360 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr); 2361 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr); 2362 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr); 2363 ierr = MatSetUp(St);CHKERRQ(ierr); 2364 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr); 2365 if (!mumps->sym) { /* MUMPS always return a full matrix */ 2366 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2367 PetscInt i,j,N=mumps->id.size_schur; 2368 for (i=0;i<N;i++) { 2369 for (j=0;j<N;j++) { 2370 #if !defined(PETSC_USE_COMPLEX) 2371 PetscScalar val = mumps->id.schur[i*N+j]; 2372 #else 2373 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2374 #endif 2375 array[j*N+i] = val; 2376 } 2377 } 2378 } else { /* stored by columns */ 2379 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2380 } 2381 } else { /* either full or lower-triangular (not packed) */ 2382 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 2383 PetscInt i,j,N=mumps->id.size_schur; 2384 for (i=0;i<N;i++) { 2385 for (j=i;j<N;j++) { 2386 #if !defined(PETSC_USE_COMPLEX) 2387 PetscScalar val = mumps->id.schur[i*N+j]; 2388 #else 2389 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2390 #endif 2391 array[i*N+j] = val; 2392 array[j*N+i] = val; 2393 } 2394 } 2395 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 2396 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr); 2397 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 2398 PetscInt i,j,N=mumps->id.size_schur; 2399 for (i=0;i<N;i++) { 2400 for (j=0;j<i+1;j++) { 2401 #if !defined(PETSC_USE_COMPLEX) 2402 PetscScalar val = mumps->id.schur[i*N+j]; 2403 #else 2404 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i; 2405 #endif 2406 array[i*N+j] = val; 2407 array[j*N+i] = val; 2408 } 2409 } 2410 } 2411 } 2412 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr); 2413 *S = St; 2414 PetscFunctionReturn(0); 2415 } 2416 2417 /* -------------------------------------------------------------------------------------------*/ 2418 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival) 2419 { 2420 PetscErrorCode ierr; 2421 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival) 2429 { 2430 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2431 2432 PetscFunctionBegin; 2433 *ival = mumps->id.ICNTL(icntl); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 /*@ 2438 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() 2439 2440 Logically Collective on Mat 2441 2442 Input Parameters: 2443 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2444 . icntl - index of MUMPS parameter array ICNTL() 2445 - ival - value of MUMPS ICNTL(icntl) 2446 2447 Options Database: 2448 . -mat_mumps_icntl_<icntl> <ival> 2449 2450 Level: beginner 2451 2452 References: 2453 . MUMPS Users' Guide 2454 2455 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2456 @*/ 2457 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival) 2458 { 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 PetscValidType(F,1); 2463 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2464 PetscValidLogicalCollectiveInt(F,icntl,2); 2465 PetscValidLogicalCollectiveInt(F,ival,3); 2466 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 /*@ 2471 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() 2472 2473 Logically Collective on Mat 2474 2475 Input Parameters: 2476 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2477 - icntl - index of MUMPS parameter array ICNTL() 2478 2479 Output Parameter: 2480 . ival - value of MUMPS ICNTL(icntl) 2481 2482 Level: beginner 2483 2484 References: 2485 . MUMPS Users' Guide 2486 2487 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2488 @*/ 2489 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival) 2490 { 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 PetscValidType(F,1); 2495 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2496 PetscValidLogicalCollectiveInt(F,icntl,2); 2497 PetscValidIntPointer(ival,3); 2498 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 /* -------------------------------------------------------------------------------------------*/ 2503 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val) 2504 { 2505 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2506 2507 PetscFunctionBegin; 2508 mumps->id.CNTL(icntl) = val; 2509 PetscFunctionReturn(0); 2510 } 2511 2512 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val) 2513 { 2514 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2515 2516 PetscFunctionBegin; 2517 *val = mumps->id.CNTL(icntl); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 /*@ 2522 MatMumpsSetCntl - Set MUMPS parameter CNTL() 2523 2524 Logically Collective on Mat 2525 2526 Input Parameters: 2527 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2528 . icntl - index of MUMPS parameter array CNTL() 2529 - val - value of MUMPS CNTL(icntl) 2530 2531 Options Database: 2532 . -mat_mumps_cntl_<icntl> <val> 2533 2534 Level: beginner 2535 2536 References: 2537 . MUMPS Users' Guide 2538 2539 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2540 @*/ 2541 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val) 2542 { 2543 PetscErrorCode ierr; 2544 2545 PetscFunctionBegin; 2546 PetscValidType(F,1); 2547 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2548 PetscValidLogicalCollectiveInt(F,icntl,2); 2549 PetscValidLogicalCollectiveReal(F,val,3); 2550 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 /*@ 2555 MatMumpsGetCntl - Get MUMPS parameter CNTL() 2556 2557 Logically Collective on Mat 2558 2559 Input Parameters: 2560 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2561 - icntl - index of MUMPS parameter array CNTL() 2562 2563 Output Parameter: 2564 . val - value of MUMPS CNTL(icntl) 2565 2566 Level: beginner 2567 2568 References: 2569 . MUMPS Users' Guide 2570 2571 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2572 @*/ 2573 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val) 2574 { 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 PetscValidType(F,1); 2579 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2580 PetscValidLogicalCollectiveInt(F,icntl,2); 2581 PetscValidRealPointer(val,3); 2582 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info) 2587 { 2588 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2589 2590 PetscFunctionBegin; 2591 *info = mumps->id.INFO(icntl); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog) 2596 { 2597 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2598 2599 PetscFunctionBegin; 2600 *infog = mumps->id.INFOG(icntl); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo) 2605 { 2606 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2607 2608 PetscFunctionBegin; 2609 *rinfo = mumps->id.RINFO(icntl); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog) 2614 { 2615 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2616 2617 PetscFunctionBegin; 2618 *rinfog = mumps->id.RINFOG(icntl); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS) 2623 { 2624 PetscErrorCode ierr; 2625 Mat Bt = NULL,Btseq = NULL; 2626 PetscBool flg; 2627 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data; 2628 PetscScalar *aa; 2629 PetscInt spnr,*ia,*ja,M,nrhs; 2630 2631 PetscFunctionBegin; 2632 PetscValidIntPointer(spRHS,2); 2633 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr); 2634 if (flg) { 2635 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr); 2636 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix"); 2637 2638 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr); 2639 2640 if (mumps->petsc_size > 1) { 2641 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data; 2642 Btseq = b->A; 2643 } else { 2644 Btseq = Bt; 2645 } 2646 2647 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr); 2648 mumps->id.nrhs = nrhs; 2649 mumps->id.lrhs = M; 2650 mumps->id.rhs = NULL; 2651 2652 if (!mumps->myid) { 2653 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr); 2654 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2655 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2656 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr); 2657 mumps->id.rhs_sparse = (MumpsScalar*)aa; 2658 } else { 2659 mumps->id.irhs_ptr = NULL; 2660 mumps->id.irhs_sparse = NULL; 2661 mumps->id.nz_rhs = 0; 2662 mumps->id.rhs_sparse = NULL; 2663 } 2664 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 2665 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 2666 2667 /* solve phase */ 2668 /*-------------*/ 2669 mumps->id.job = JOB_SOLVE; 2670 PetscMUMPS_c(mumps); 2671 if (mumps->id.INFOG(1) < 0) 2672 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)); 2673 2674 if (!mumps->myid) { 2675 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr); 2676 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr); 2677 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure"); 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 /*@ 2683 MatMumpsGetInverse - Get user-specified set of entries in inverse of A 2684 2685 Logically Collective on Mat 2686 2687 Input Parameters: 2688 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2689 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0] 2690 2691 Output Parameter: 2692 . spRHS - requested entries of inverse of A 2693 2694 Level: beginner 2695 2696 References: 2697 . MUMPS Users' Guide 2698 2699 .seealso: MatGetFactor(), MatCreateTranspose() 2700 @*/ 2701 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS) 2702 { 2703 PetscErrorCode ierr; 2704 2705 PetscFunctionBegin; 2706 PetscValidType(F,1); 2707 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2708 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr); 2709 PetscFunctionReturn(0); 2710 } 2711 2712 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST) 2713 { 2714 PetscErrorCode ierr; 2715 Mat spRHS; 2716 2717 PetscFunctionBegin; 2718 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr); 2719 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr); 2720 ierr = MatDestroy(&spRHS);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 /*@ 2725 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T 2726 2727 Logically Collective on Mat 2728 2729 Input Parameters: 2730 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface 2731 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0] 2732 2733 Output Parameter: 2734 . spRHST - requested entries of inverse of A^T 2735 2736 Level: beginner 2737 2738 References: 2739 . MUMPS Users' Guide 2740 2741 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse() 2742 @*/ 2743 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST) 2744 { 2745 PetscErrorCode ierr; 2746 PetscBool flg; 2747 2748 PetscFunctionBegin; 2749 PetscValidType(F,1); 2750 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2751 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr); 2752 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix"); 2753 2754 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 /*@ 2759 MatMumpsGetInfo - Get MUMPS parameter INFO() 2760 2761 Logically Collective on Mat 2762 2763 Input Parameters: 2764 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2765 - icntl - index of MUMPS parameter array INFO() 2766 2767 Output Parameter: 2768 . ival - value of MUMPS INFO(icntl) 2769 2770 Level: beginner 2771 2772 References: 2773 . MUMPS Users' Guide 2774 2775 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2776 @*/ 2777 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival) 2778 { 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 PetscValidType(F,1); 2783 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2784 PetscValidIntPointer(ival,3); 2785 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 2789 /*@ 2790 MatMumpsGetInfog - Get MUMPS parameter INFOG() 2791 2792 Logically Collective on Mat 2793 2794 Input Parameters: 2795 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2796 - icntl - index of MUMPS parameter array INFOG() 2797 2798 Output Parameter: 2799 . ival - value of MUMPS INFOG(icntl) 2800 2801 Level: beginner 2802 2803 References: 2804 . MUMPS Users' Guide 2805 2806 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog() 2807 @*/ 2808 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival) 2809 { 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 PetscValidType(F,1); 2814 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2815 PetscValidIntPointer(ival,3); 2816 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 /*@ 2821 MatMumpsGetRinfo - Get MUMPS parameter RINFO() 2822 2823 Logically Collective on Mat 2824 2825 Input Parameters: 2826 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2827 - icntl - index of MUMPS parameter array RINFO() 2828 2829 Output Parameter: 2830 . val - value of MUMPS RINFO(icntl) 2831 2832 Level: beginner 2833 2834 References: 2835 . MUMPS Users' Guide 2836 2837 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog() 2838 @*/ 2839 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val) 2840 { 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 PetscValidType(F,1); 2845 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2846 PetscValidRealPointer(val,3); 2847 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2848 PetscFunctionReturn(0); 2849 } 2850 2851 /*@ 2852 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() 2853 2854 Logically Collective on Mat 2855 2856 Input Parameters: 2857 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface 2858 - icntl - index of MUMPS parameter array RINFOG() 2859 2860 Output Parameter: 2861 . val - value of MUMPS RINFOG(icntl) 2862 2863 Level: beginner 2864 2865 References: 2866 . MUMPS Users' Guide 2867 2868 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo() 2869 @*/ 2870 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val) 2871 { 2872 PetscErrorCode ierr; 2873 2874 PetscFunctionBegin; 2875 PetscValidType(F,1); 2876 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix"); 2877 PetscValidRealPointer(val,3); 2878 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 /*MC 2883 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 2884 distributed and sequential matrices via the external package MUMPS. 2885 2886 Works with MATAIJ and MATSBAIJ matrices 2887 2888 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 2889 2890 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below. 2891 2892 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver 2893 2894 Options Database Keys: 2895 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 2896 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 2897 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 2898 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 2899 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 2900 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis 2901 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 2902 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 2903 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 2904 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 2905 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 2906 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 2907 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 2908 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 2909 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 2910 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 2911 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 2912 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 2913 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 2914 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering 2915 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 2916 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 2917 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 2918 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 2919 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 2920 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 2921 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 2922 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 2923 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 2924 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 2925 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 2926 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 2927 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 2928 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 2929 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 2930 2931 If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1 2932 2933 Level: beginner 2934 2935 Notes: 2936 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian. 2937 2938 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling 2939 $ KSPGetPC(ksp,&pc); 2940 $ PCFactorGetMatrix(pc,&mat); 2941 $ MatMumpsGetInfo(mat,....); 2942 $ MatMumpsGetInfog(mat,....); etc. 2943 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message. 2944 2945 Two modes to run MUMPS/PETSc with OpenMP 2946 2947 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 2948 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test". 2949 2950 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example, 2951 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 2952 2953 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 2954 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc 2955 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 2956 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 2957 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided). 2958 2959 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 2960 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 2961 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 2962 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 2963 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 2964 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 2965 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 2966 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 2967 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 2968 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding. 2969 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and 2970 examine the mapping result. 2971 2972 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts, 2973 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc 2974 calls omp_set_num_threads(m) internally before calling MUMPS. 2975 2976 References: 2977 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011). 2978 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017. 2979 2980 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix() 2981 2982 M*/ 2983 2984 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type) 2985 { 2986 PetscFunctionBegin; 2987 *type = MATSOLVERMUMPS; 2988 PetscFunctionReturn(0); 2989 } 2990 2991 /* MatGetFactor for Seq and MPI AIJ matrices */ 2992 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F) 2993 { 2994 Mat B; 2995 PetscErrorCode ierr; 2996 Mat_MUMPS *mumps; 2997 PetscBool isSeqAIJ; 2998 PetscMPIInt size; 2999 3000 PetscFunctionBegin; 3001 #if defined(PETSC_USE_COMPLEX) 3002 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3003 #endif 3004 /* Create the factorization matrix */ 3005 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr); 3006 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3007 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3008 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3009 ierr = MatSetUp(B);CHKERRQ(ierr); 3010 3011 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3012 3013 B->ops->view = MatView_MUMPS; 3014 B->ops->getinfo = MatGetInfo_MUMPS; 3015 3016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3029 3030 if (ftype == MAT_FACTOR_LU) { 3031 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3032 B->factortype = MAT_FACTOR_LU; 3033 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 3034 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 3035 mumps->sym = 0; 3036 } else { 3037 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3038 B->factortype = MAT_FACTOR_CHOLESKY; 3039 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 3040 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 3041 #if defined(PETSC_USE_COMPLEX) 3042 mumps->sym = 2; 3043 #else 3044 if (A->spd_set && A->spd) mumps->sym = 1; 3045 else mumps->sym = 2; 3046 #endif 3047 } 3048 3049 /* set solvertype */ 3050 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3051 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3053 if (size == 1) { 3054 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3055 B->useordering = PETSC_TRUE; 3056 } 3057 3058 B->ops->destroy = MatDestroy_MUMPS; 3059 B->data = (void*)mumps; 3060 3061 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3062 3063 *F = B; 3064 PetscFunctionReturn(0); 3065 } 3066 3067 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 3068 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F) 3069 { 3070 Mat B; 3071 PetscErrorCode ierr; 3072 Mat_MUMPS *mumps; 3073 PetscBool isSeqSBAIJ; 3074 PetscMPIInt size; 3075 3076 PetscFunctionBegin; 3077 #if defined(PETSC_USE_COMPLEX) 3078 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported"); 3079 #endif 3080 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3081 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3082 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3083 ierr = MatSetUp(B);CHKERRQ(ierr); 3084 3085 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3086 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr); 3087 if (isSeqSBAIJ) { 3088 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 3089 } else { 3090 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 3091 } 3092 3093 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 3094 B->ops->view = MatView_MUMPS; 3095 B->ops->getinfo = MatGetInfo_MUMPS; 3096 3097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3110 3111 B->factortype = MAT_FACTOR_CHOLESKY; 3112 #if defined(PETSC_USE_COMPLEX) 3113 mumps->sym = 2; 3114 #else 3115 if (A->spd_set && A->spd) mumps->sym = 1; 3116 else mumps->sym = 2; 3117 #endif 3118 3119 /* set solvertype */ 3120 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3121 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3122 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3123 if (size == 1) { 3124 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3125 B->useordering = PETSC_TRUE; 3126 } 3127 3128 B->ops->destroy = MatDestroy_MUMPS; 3129 B->data = (void*)mumps; 3130 3131 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3132 3133 *F = B; 3134 PetscFunctionReturn(0); 3135 } 3136 3137 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F) 3138 { 3139 Mat B; 3140 PetscErrorCode ierr; 3141 Mat_MUMPS *mumps; 3142 PetscBool isSeqBAIJ; 3143 PetscMPIInt size; 3144 3145 PetscFunctionBegin; 3146 /* Create the factorization matrix */ 3147 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr); 3148 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3149 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3150 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3151 ierr = MatSetUp(B);CHKERRQ(ierr); 3152 3153 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3154 if (ftype == MAT_FACTOR_LU) { 3155 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 3156 B->factortype = MAT_FACTOR_LU; 3157 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 3158 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 3159 mumps->sym = 0; 3160 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n"); 3161 3162 B->ops->view = MatView_MUMPS; 3163 B->ops->getinfo = MatGetInfo_MUMPS; 3164 3165 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3166 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3167 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3168 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3173 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr); 3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr); 3178 3179 /* set solvertype */ 3180 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3181 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3183 if (size == 1) { 3184 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3185 B->useordering = PETSC_TRUE; 3186 } 3187 3188 B->ops->destroy = MatDestroy_MUMPS; 3189 B->data = (void*)mumps; 3190 3191 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3192 3193 *F = B; 3194 PetscFunctionReturn(0); 3195 } 3196 3197 /* MatGetFactor for Seq and MPI SELL matrices */ 3198 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F) 3199 { 3200 Mat B; 3201 PetscErrorCode ierr; 3202 Mat_MUMPS *mumps; 3203 PetscBool isSeqSELL; 3204 PetscMPIInt size; 3205 3206 PetscFunctionBegin; 3207 /* Create the factorization matrix */ 3208 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr); 3209 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3210 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3211 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr); 3212 ierr = MatSetUp(B);CHKERRQ(ierr); 3213 3214 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr); 3215 3216 B->ops->view = MatView_MUMPS; 3217 B->ops->getinfo = MatGetInfo_MUMPS; 3218 3219 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr); 3220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr); 3221 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr); 3222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr); 3223 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr); 3224 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr); 3225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr); 3226 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr); 3227 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr); 3228 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr); 3229 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr); 3230 3231 if (ftype == MAT_FACTOR_LU) { 3232 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 3233 B->factortype = MAT_FACTOR_LU; 3234 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 3235 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3236 mumps->sym = 0; 3237 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented"); 3238 3239 /* set solvertype */ 3240 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 3241 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr); 3242 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 3243 if (size == 1) { 3244 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */ 3245 B->useordering = PETSC_TRUE; 3246 } 3247 3248 B->ops->destroy = MatDestroy_MUMPS; 3249 B->data = (void*)mumps; 3250 3251 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr); 3252 3253 *F = B; 3254 PetscFunctionReturn(0); 3255 } 3256 3257 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 3258 { 3259 PetscErrorCode ierr; 3260 3261 PetscFunctionBegin; 3262 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3263 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3264 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3265 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3266 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3267 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3268 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr); 3269 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3270 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr); 3271 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr); 3272 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr); 3273 PetscFunctionReturn(0); 3274 } 3275 3276