1 /*$Id: mumps.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the MUMPS_4.3.1 sparse solver 4 */ 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zmumps_c.h" 13 #else 14 #include "dmumps_c.h" 15 #endif 16 EXTERN_C_END 17 #define JOB_INIT -1 18 #define JOB_END -2 19 /* macros s.t. indices match MUMPS documentation */ 20 #define ICNTL(I) icntl[(I)-1] 21 #define CNTL(I) cntl[(I)-1] 22 #define INFOG(I) infog[(I)-1] 23 #define INFO(I) info[(I)-1] 24 #define RINFOG(I) rinfog[(I)-1] 25 #define RINFO(I) rinfo[(I)-1] 26 27 typedef struct { 28 #if defined(PETSC_USE_COMPLEX) 29 ZMUMPS_STRUC_C id; 30 #else 31 DMUMPS_STRUC_C id; 32 #endif 33 MatStructure matstruc; 34 int myid,size,*irn,*jcn,sym; 35 PetscScalar *val; 36 MPI_Comm comm_mumps; 37 38 PetscTruth isAIJ,CleanUpMUMPS; 39 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 40 int (*MatView)(Mat,PetscViewer); 41 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 42 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 43 int (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 44 int (*MatDestroy)(Mat); 45 int (*specialdestroy)(Mat); 46 int (*MatPreallocate)(Mat,int,int,int*,int,int*); 47 } Mat_MUMPS; 48 49 EXTERN int MatDuplicate_AIJMUMPS(Mat,MatDuplicateOption,Mat*); 50 EXTERN int MatDuplicate_SBAIJMUMPS(Mat,MatDuplicateOption,Mat*); 51 EXTERN int MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*); 52 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 53 /* 54 input: 55 A - matrix in mpiaij or mpisbaij (bs=1) format 56 shift - 0: C style output triple; 1: Fortran style output triple. 57 valOnly - FALSE: spaces are allocated and values are set for the triple 58 TRUE: only the values in v array are updated 59 output: 60 nnz - dim of r, c, and v (number of local nonzero entries of A) 61 r, c, v - row and col index, matrix values (matrix triples) 62 */ 63 int MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 64 int *ai, *aj, *bi, *bj, rstart,nz, *garray; 65 int ierr,i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 66 int *row,*col; 67 PetscScalar *av, *bv,*val; 68 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 69 70 PetscFunctionBegin; 71 if (mumps->isAIJ){ 72 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 73 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 74 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 75 nz = aa->nz + bb->nz; 76 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 77 garray = mat->garray; 78 av=aa->a; bv=bb->a; 79 80 } else { 81 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 82 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 83 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 84 if (mat->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", mat->bs); 85 nz = aa->nz + bb->nz; 86 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 87 garray = mat->garray; 88 av=aa->a; bv=bb->a; 89 } 90 91 if (!valOnly){ 92 ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr); 93 ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr); 94 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 95 *r = row; *c = col; *v = val; 96 } else { 97 row = *r; col = *c; val = *v; 98 } 99 *nnz = nz; 100 101 jj = 0; irow = rstart; 102 for ( i=0; i<m; i++ ) { 103 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 104 countA = ai[i+1] - ai[i]; 105 countB = bi[i+1] - bi[i]; 106 bjj = bj + bi[i]; 107 108 /* get jB, the starting local col index for the 2nd B-part */ 109 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 110 j=-1; 111 do { 112 j++; 113 if (j == countB) break; 114 jcol = garray[bjj[j]]; 115 } while (jcol < colA_start); 116 jB = j; 117 118 /* B-part, smaller col index */ 119 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 120 for (j=0; j<jB; j++){ 121 jcol = garray[bjj[j]]; 122 if (!valOnly){ 123 row[jj] = irow + shift; col[jj] = jcol + shift; 124 125 } 126 val[jj++] = *bv++; 127 } 128 /* A-part */ 129 for (j=0; j<countA; j++){ 130 if (!valOnly){ 131 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 132 } 133 val[jj++] = *av++; 134 } 135 /* B-part, larger col index */ 136 for (j=jB; j<countB; j++){ 137 if (!valOnly){ 138 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 139 } 140 val[jj++] = *bv++; 141 } 142 irow++; 143 } 144 145 PetscFunctionReturn(0); 146 } 147 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatConvert_MUMPS_Base" 151 int MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) { 152 int ierr; 153 Mat B=*newmat; 154 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 155 156 PetscFunctionBegin; 157 if (B != A) { 158 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 159 } 160 B->ops->duplicate = mumps->MatDuplicate; 161 B->ops->view = mumps->MatView; 162 B->ops->assemblyend = mumps->MatAssemblyEnd; 163 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 164 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 165 B->ops->destroy = mumps->MatDestroy; 166 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 167 ierr = PetscFree(mumps);CHKERRQ(ierr); 168 *newmat = B; 169 PetscFunctionReturn(0); 170 } 171 EXTERN_C_END 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDestroy_MUMPS" 175 int MatDestroy_MUMPS(Mat A) { 176 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 177 int ierr,size=lu->size; 178 int (*specialdestroy)(Mat); 179 PetscFunctionBegin; 180 if (lu->CleanUpMUMPS) { 181 /* Terminate instance, deallocate memories */ 182 lu->id.job=JOB_END; 183 #if defined(PETSC_USE_COMPLEX) 184 zmumps_c(&lu->id); 185 #else 186 dmumps_c(&lu->id); 187 #endif 188 if (lu->irn) { 189 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 190 } 191 if (lu->jcn) { 192 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 193 } 194 if (size>1 && lu->val) { 195 ierr = PetscFree(lu->val);CHKERRQ(ierr); 196 } 197 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 198 } 199 specialdestroy = lu->specialdestroy; 200 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 201 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatDestroy_AIJMUMPS" 207 int MatDestroy_AIJMUMPS(Mat A) { 208 int ierr, size; 209 210 PetscFunctionBegin; 211 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 212 if (size==1) { 213 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 214 } else { 215 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 216 } 217 PetscFunctionReturn(0); 218 } 219 220 #undef __FUNCT__ 221 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 222 int MatDestroy_SBAIJMUMPS(Mat A) { 223 int ierr, size; 224 225 PetscFunctionBegin; 226 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 227 if (size==1) { 228 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr); 229 } else { 230 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatFactorInfo_MUMPS" 237 int MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 238 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 239 int ierr; 240 241 PetscFunctionBegin; 242 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 243 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 244 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 245 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 246 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 247 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 248 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 249 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 250 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 252 if (lu->myid == 0 && lu->id.ICNTL(11)>0) { 253 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 254 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 255 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 256 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 257 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 258 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 259 260 } 261 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 262 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 263 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 264 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 265 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 266 267 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 268 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 270 271 /* infomation local to each processor */ 272 if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 273 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 274 ierr = PetscSynchronizedFlush(A->comm); 275 if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 276 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 277 ierr = PetscSynchronizedFlush(A->comm); 278 if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 279 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 280 ierr = PetscSynchronizedFlush(A->comm); 281 282 if (lu->myid == 0){ /* information from the host */ 283 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 284 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 285 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 286 287 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 288 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 289 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 293 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 294 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 297 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 298 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 299 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 300 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 301 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 302 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 303 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 305 } 306 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatView_AIJMUMPS" 312 int MatView_AIJMUMPS(Mat A,PetscViewer viewer) { 313 int ierr; 314 PetscTruth isascii; 315 PetscViewerFormat format; 316 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 317 318 PetscFunctionBegin; 319 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 320 321 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 322 if (isascii) { 323 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 324 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 325 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 326 } 327 } 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatSolve_AIJMUMPS" 333 int MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 334 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 335 PetscScalar *array; 336 Vec x_seq; 337 IS iden; 338 VecScatter scat; 339 int ierr; 340 341 PetscFunctionBegin; 342 if (lu->size > 1){ 343 if (!lu->myid){ 344 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 345 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 346 } else { 347 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 348 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 349 } 350 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 351 ierr = ISDestroy(iden);CHKERRQ(ierr); 352 353 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 354 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 355 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 356 } else { /* size == 1 */ 357 ierr = VecCopy(b,x);CHKERRQ(ierr); 358 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 359 } 360 if (!lu->myid) { /* define rhs on the host */ 361 #if defined(PETSC_USE_COMPLEX) 362 lu->id.rhs = (mumps_double_complex*)array; 363 #else 364 lu->id.rhs = array; 365 #endif 366 } 367 368 /* solve phase */ 369 lu->id.job=3; 370 #if defined(PETSC_USE_COMPLEX) 371 zmumps_c(&lu->id); 372 #else 373 dmumps_c(&lu->id); 374 #endif 375 if (lu->id.INFOG(1) < 0) { 376 SETERRQ1(1,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 377 } 378 379 /* convert mumps solution x_seq to petsc mpi x */ 380 if (lu->size > 1) { 381 if (!lu->myid){ 382 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 383 } 384 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 385 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 386 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 387 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 388 } else { 389 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 390 } 391 392 PetscFunctionReturn(0); 393 } 394 395 /* 396 input: 397 F: numeric factor 398 output: 399 nneg: total number of negative pivots 400 nzero: 0 401 npos: (global dimension of F) - nneg 402 */ 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 406 int MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 407 { 408 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 409 int ierr,neg,zero,pos,size; 410 411 PetscFunctionBegin; 412 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 413 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 414 if (size > 1 && lu->id.ICNTL(13) != 1){ 415 SETERRQ1(1,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 416 } 417 if (nneg){ 418 if (!lu->myid){ 419 *nneg = lu->id.INFOG(12); 420 } 421 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 422 } 423 if (nzero) *nzero = 0; 424 if (npos) *npos = F->M - (*nneg); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatFactorNumeric_MPIAIJMUMPS" 430 int MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) { 431 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 432 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 433 int rnz,nnz,ierr,nz,i,M=A->M,*ai,*aj,icntl; 434 PetscTruth valOnly,flg; 435 436 PetscFunctionBegin; 437 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 438 (*F)->ops->solve = MatSolve_AIJMUMPS; 439 440 /* Initialize a MUMPS instance */ 441 ierr = MPI_Comm_rank(A->comm, &lu->myid); 442 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 443 lua->myid = lu->myid; lua->size = lu->size; 444 lu->id.job = JOB_INIT; 445 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 446 lu->id.comm_fortran = lu->comm_mumps; 447 448 /* Set mumps options */ 449 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 450 lu->id.par=1; /* host participates factorizaton and solve */ 451 lu->id.sym=lu->sym; 452 if (lu->sym == 2){ 453 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 454 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 455 } 456 #if defined(PETSC_USE_COMPLEX) 457 zmumps_c(&lu->id); 458 #else 459 dmumps_c(&lu->id); 460 #endif 461 462 if (lu->size == 1){ 463 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 464 } else { 465 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 466 } 467 468 icntl=-1; 469 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 470 if (flg && icntl > 0) { 471 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 472 } else { /* no output */ 473 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 474 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 475 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 476 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 477 } 478 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 479 icntl=-1; 480 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 481 if (flg) { 482 if (icntl== 1){ 483 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 484 } else { 485 lu->id.ICNTL(7) = icntl; 486 } 487 } 488 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 489 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 490 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 491 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 492 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 493 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 494 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 495 496 /* 497 ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr); 498 if (flg){ 499 if (icntl >-1 && icntl <3 ){ 500 if (lu->myid==0) lu->id.ICNTL(16) = icntl; 501 } else { 502 SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl); 503 } 504 } 505 */ 506 507 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 508 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 509 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 510 PetscOptionsEnd(); 511 } 512 513 /* define matrix A */ 514 switch (lu->id.ICNTL(18)){ 515 case 0: /* centralized assembled matrix input (size=1) */ 516 if (!lu->myid) { 517 if (lua->isAIJ){ 518 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 519 nz = aa->nz; 520 ai = aa->i; aj = aa->j; lu->val = aa->a; 521 } else { 522 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 523 nz = aa->nz; 524 ai = aa->i; aj = aa->j; lu->val = aa->a; 525 } 526 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 527 ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr); 528 ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr); 529 nz = 0; 530 for (i=0; i<M; i++){ 531 rnz = ai[i+1] - ai[i]; 532 while (rnz--) { /* Fortran row/col index! */ 533 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 534 } 535 } 536 } 537 } 538 break; 539 case 3: /* distributed assembled matrix input (size>1) */ 540 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 541 valOnly = PETSC_FALSE; 542 } else { 543 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 544 } 545 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 546 break; 547 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 548 } 549 550 /* analysis phase */ 551 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 552 lu->id.n = M; 553 switch (lu->id.ICNTL(18)){ 554 case 0: /* centralized assembled matrix input */ 555 if (!lu->myid) { 556 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 557 if (lu->id.ICNTL(6)>1){ 558 #if defined(PETSC_USE_COMPLEX) 559 lu->id.a = (mumps_double_complex*)lu->val; 560 #else 561 lu->id.a = lu->val; 562 #endif 563 } 564 } 565 break; 566 case 3: /* distributed assembled matrix input (size>1) */ 567 lu->id.nz_loc = nnz; 568 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 569 if (lu->id.ICNTL(6)>1) { 570 #if defined(PETSC_USE_COMPLEX) 571 lu->id.a_loc = (mumps_double_complex*)lu->val; 572 #else 573 lu->id.a_loc = lu->val; 574 #endif 575 } 576 break; 577 } 578 lu->id.job=1; 579 #if defined(PETSC_USE_COMPLEX) 580 zmumps_c(&lu->id); 581 #else 582 dmumps_c(&lu->id); 583 #endif 584 if (lu->id.INFOG(1) < 0) { 585 SETERRQ1(1,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 586 } 587 } 588 589 /* numerical factorization phase */ 590 if(lu->id.ICNTL(18) == 0) { 591 if (!lu->myid) { 592 #if defined(PETSC_USE_COMPLEX) 593 lu->id.a = (mumps_double_complex*)lu->val; 594 #else 595 lu->id.a = lu->val; 596 #endif 597 } 598 } else { 599 #if defined(PETSC_USE_COMPLEX) 600 lu->id.a_loc = (mumps_double_complex*)lu->val; 601 #else 602 lu->id.a_loc = lu->val; 603 #endif 604 } 605 lu->id.job=2; 606 #if defined(PETSC_USE_COMPLEX) 607 zmumps_c(&lu->id); 608 #else 609 dmumps_c(&lu->id); 610 #endif 611 if (lu->id.INFOG(1) < 0) { 612 SETERRQ2(1,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 613 } 614 615 if (lu->myid==0 && lu->id.ICNTL(16) > 0){ 616 SETERRQ1(1," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 617 } 618 619 (*F)->assembled = PETSC_TRUE; 620 lu->matstruc = SAME_NONZERO_PATTERN; 621 lu->CleanUpMUMPS = PETSC_TRUE; 622 PetscFunctionReturn(0); 623 } 624 625 /* Note the Petsc r and c permutations are ignored */ 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 628 int MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 629 Mat B; 630 Mat_MUMPS *lu; 631 int ierr; 632 633 PetscFunctionBegin; 634 635 /* Create the factorization matrix */ 636 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 637 ierr = MatSetType(B,MATAIJMUMPS);CHKERRQ(ierr); 638 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 639 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 640 641 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 642 B->factor = FACTOR_LU; 643 lu = (Mat_MUMPS*)B->spptr; 644 lu->sym = 0; 645 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 646 647 *F = B; 648 PetscFunctionReturn(0); 649 } 650 651 /* Note the Petsc r permutation is ignored */ 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 654 int MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 655 Mat B; 656 Mat_MUMPS *lu; 657 int ierr; 658 659 PetscFunctionBegin; 660 661 /* Create the factorization matrix */ 662 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 663 ierr = MatSetType(B,MATAIJMUMPS);CHKERRQ(ierr); 664 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 665 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 666 667 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 668 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 669 B->factor = FACTOR_CHOLESKY; 670 lu = (Mat_MUMPS*)B->spptr; 671 lu->sym = 2; 672 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 673 674 *F = B; 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 680 int MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 681 int ierr; 682 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 683 684 PetscFunctionBegin; 685 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 686 687 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 688 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 689 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 690 PetscFunctionReturn(0); 691 } 692 693 EXTERN_C_BEGIN 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 696 int MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat) { 697 int ierr,size; 698 MPI_Comm comm; 699 Mat B=*newmat; 700 Mat_MUMPS *mumps; 701 702 PetscFunctionBegin; 703 if (B != A) { 704 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 705 } 706 707 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 708 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 709 710 mumps->MatDuplicate = A->ops->duplicate; 711 mumps->MatView = A->ops->view; 712 mumps->MatAssemblyEnd = A->ops->assemblyend; 713 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 714 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 715 mumps->MatDestroy = A->ops->destroy; 716 mumps->specialdestroy = MatDestroy_AIJMUMPS; 717 mumps->CleanUpMUMPS = PETSC_FALSE; 718 mumps->isAIJ = PETSC_TRUE; 719 720 B->spptr = (void *)mumps; 721 B->ops->duplicate = MatDuplicate_AIJMUMPS; 722 B->ops->view = MatView_AIJMUMPS; 723 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 724 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 725 B->ops->destroy = MatDestroy_MUMPS; 726 727 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 728 if (size == 1) { 729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 730 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 732 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 733 } else { 734 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 735 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 737 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 738 } 739 740 PetscLogInfo(0,"Using MUMPS for LU factorization and solves."); 741 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 742 *newmat = B; 743 PetscFunctionReturn(0); 744 } 745 EXTERN_C_END 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatDuplicate_AIJMUMPS" 749 int MatDuplicate_AIJMUMPS(Mat A, MatDuplicateOption op, Mat *M) { 750 int ierr; 751 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 752 753 PetscFunctionBegin; 754 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 755 ierr = MatConvert_AIJ_AIJMUMPS(*M,MATAIJMUMPS,M);CHKERRQ(ierr); 756 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 /*MC 761 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 762 and sequential matrices via the external package MUMPS. 763 764 If MUMPS is installed (see the manual for instructions 765 on how to declare the existence of external packages), 766 a matrix type can be constructed which invokes MUMPS solvers. 767 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 768 This matrix type is only supported for double precision real. 769 770 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 771 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 772 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 773 for communicators controlling multiple processes. It is recommended that you call both of 774 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 775 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 776 without data copy. 777 778 Options Database Keys: 779 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 780 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 781 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 782 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 783 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 784 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 785 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 786 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 787 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 788 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 789 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 790 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 791 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 792 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 793 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 794 795 Level: beginner 796 797 .seealso: MATSBAIJMUMPS 798 M*/ 799 800 EXTERN_C_BEGIN 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatCreate_AIJMUMPS" 803 int MatCreate_AIJMUMPS(Mat A) { 804 int ierr,size; 805 Mat A_diag; 806 MPI_Comm comm; 807 808 PetscFunctionBegin; 809 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 810 /* and AIJMUMPS types */ 811 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 812 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 814 if (size == 1) { 815 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 816 } else { 817 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 818 A_diag = ((Mat_MPIAIJ *)A->data)->A; 819 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr); 820 } 821 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 EXTERN_C_END 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 828 int MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) { 829 int ierr; 830 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 831 832 PetscFunctionBegin; 833 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 834 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 835 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 836 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 837 PetscFunctionReturn(0); 838 } 839 840 EXTERN_C_BEGIN 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 843 int MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 844 { 845 Mat A; 846 Mat_MUMPS *mumps; 847 int ierr; 848 849 PetscFunctionBegin; 850 /* 851 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 852 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 853 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 854 block size info so that PETSc can determine the local size properly. The block size info is set 855 in the preallocation routine. 856 */ 857 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 858 A = ((Mat_MPISBAIJ *)B->data)->A; 859 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 EXTERN_C_END 863 864 EXTERN_C_BEGIN 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 867 int MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat) { 868 int ierr,size; 869 MPI_Comm comm; 870 Mat B=*newmat; 871 Mat_MUMPS *mumps; 872 void (*f)(void); 873 874 PetscFunctionBegin; 875 if (B != A) { 876 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 877 } 878 879 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 880 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 881 882 mumps->MatDuplicate = A->ops->duplicate; 883 mumps->MatView = A->ops->view; 884 mumps->MatAssemblyEnd = A->ops->assemblyend; 885 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 886 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 887 mumps->MatDestroy = A->ops->destroy; 888 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 889 mumps->CleanUpMUMPS = PETSC_FALSE; 890 mumps->isAIJ = PETSC_FALSE; 891 892 B->spptr = (void *)mumps; 893 B->ops->duplicate = MatDuplicate_SBAIJMUMPS; 894 B->ops->view = MatView_AIJMUMPS; 895 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 896 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 897 B->ops->destroy = MatDestroy_MUMPS; 898 899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 900 if (size == 1) { 901 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 902 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 903 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 904 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 905 } else { 906 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 907 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 908 if (f) { 909 mumps->MatPreallocate = (int (*)(Mat,int,int,int*,int,int*))f; 910 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 911 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 912 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 913 } 914 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 915 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 917 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 918 } 919 920 PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves."); 921 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 922 *newmat = B; 923 PetscFunctionReturn(0); 924 } 925 EXTERN_C_END 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatDuplicate_SBAIJMUMPS" 929 int MatDuplicate_SBAIJMUMPS(Mat A, MatDuplicateOption op, Mat *M) { 930 int ierr; 931 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 932 933 PetscFunctionBegin; 934 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 935 ierr = MatConvert_SBAIJ_SBAIJMUMPS(*M,MATSBAIJMUMPS,M);CHKERRQ(ierr); 936 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 937 PetscFunctionReturn(0); 938 } 939 940 /*MC 941 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 942 distributed and sequential matrices via the external package MUMPS. 943 944 If MUMPS is installed (see the manual for instructions 945 on how to declare the existence of external packages), 946 a matrix type can be constructed which invokes MUMPS solvers. 947 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 948 This matrix type is only supported for double precision real. 949 950 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 951 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 952 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 953 for communicators controlling multiple processes. It is recommended that you call both of 954 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 955 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 956 without data copy. 957 958 Options Database Keys: 959 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 960 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 961 . -mat_mumps_icntl_4 <0,...,4> - print level 962 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 963 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 964 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 965 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 966 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 967 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 968 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 969 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 970 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 971 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 972 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 973 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 974 975 Level: beginner 976 977 .seealso: MATAIJMUMPS 978 M*/ 979 980 EXTERN_C_BEGIN 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 983 int MatCreate_SBAIJMUMPS(Mat A) { 984 int ierr,size; 985 986 PetscFunctionBegin; 987 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 988 /* and SBAIJMUMPS types */ 989 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 990 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 991 if (size == 1) { 992 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 993 } else { 994 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 995 } 996 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr); 997 PetscFunctionReturn(0); 998 } 999 EXTERN_C_END 1000