1 /*$Id: mumps.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 /* 3 Provides an interface to the MUMPS_4.3.1 sparse solver 4 */ 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/mat/impls/sbaij/seq/sbaij.h" 8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zmumps_c.h" 13 #else 14 #include "dmumps_c.h" 15 #endif 16 EXTERN_C_END 17 #define JOB_INIT -1 18 #define JOB_END -2 19 /* macros s.t. indices match MUMPS documentation */ 20 #define ICNTL(I) icntl[(I)-1] 21 #define CNTL(I) cntl[(I)-1] 22 #define INFOG(I) infog[(I)-1] 23 #define INFO(I) info[(I)-1] 24 #define RINFOG(I) rinfog[(I)-1] 25 #define RINFO(I) rinfo[(I)-1] 26 27 typedef struct { 28 #if defined(PETSC_USE_COMPLEX) 29 ZMUMPS_STRUC_C id; 30 #else 31 DMUMPS_STRUC_C id; 32 #endif 33 MatStructure matstruc; 34 int myid,size,*irn,*jcn,sym; 35 PetscScalar *val; 36 MPI_Comm comm_mumps; 37 38 PetscTruth isAIJ,CleanUpMUMPS; 39 int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 40 int (*MatView)(Mat,PetscViewer); 41 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 42 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 43 int (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 44 int (*MatDestroy)(Mat); 45 int (*specialdestroy)(Mat); 46 int (*MatPreallocate)(Mat,int,int,int*,int,int*); 47 } Mat_MUMPS; 48 49 EXTERN int MatDuplicate_AIJMUMPS(Mat,MatDuplicateOption,Mat*); 50 EXTERN int MatDuplicate_SBAIJMUMPS(Mat,MatDuplicateOption,Mat*); 51 EXTERN_C_BEGIN 52 int MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*); 53 EXTERN_C_END 54 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 55 /* 56 input: 57 A - matrix in mpiaij or mpisbaij (bs=1) format 58 shift - 0: C style output triple; 1: Fortran style output triple. 59 valOnly - FALSE: spaces are allocated and values are set for the triple 60 TRUE: only the values in v array are updated 61 output: 62 nnz - dim of r, c, and v (number of local nonzero entries of A) 63 r, c, v - row and col index, matrix values (matrix triples) 64 */ 65 int MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 66 int *ai, *aj, *bi, *bj, rstart,nz, *garray; 67 int ierr,i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 68 int *row,*col; 69 PetscScalar *av, *bv,*val; 70 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 71 72 PetscFunctionBegin; 73 if (mumps->isAIJ){ 74 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 75 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 76 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 77 nz = aa->nz + bb->nz; 78 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 79 garray = mat->garray; 80 av=aa->a; bv=bb->a; 81 82 } else { 83 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 84 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 85 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 86 if (mat->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", mat->bs); 87 nz = aa->nz + bb->nz; 88 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 89 garray = mat->garray; 90 av=aa->a; bv=bb->a; 91 } 92 93 if (!valOnly){ 94 ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr); 95 ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr); 96 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 97 *r = row; *c = col; *v = val; 98 } else { 99 row = *r; col = *c; val = *v; 100 } 101 *nnz = nz; 102 103 jj = 0; irow = rstart; 104 for ( i=0; i<m; i++ ) { 105 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 106 countA = ai[i+1] - ai[i]; 107 countB = bi[i+1] - bi[i]; 108 bjj = bj + bi[i]; 109 110 /* get jB, the starting local col index for the 2nd B-part */ 111 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 112 j=-1; 113 do { 114 j++; 115 if (j == countB) break; 116 jcol = garray[bjj[j]]; 117 } while (jcol < colA_start); 118 jB = j; 119 120 /* B-part, smaller col index */ 121 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 122 for (j=0; j<jB; j++){ 123 jcol = garray[bjj[j]]; 124 if (!valOnly){ 125 row[jj] = irow + shift; col[jj] = jcol + shift; 126 127 } 128 val[jj++] = *bv++; 129 } 130 /* A-part */ 131 for (j=0; j<countA; j++){ 132 if (!valOnly){ 133 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 134 } 135 val[jj++] = *av++; 136 } 137 /* B-part, larger col index */ 138 for (j=jB; j<countB; j++){ 139 if (!valOnly){ 140 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 141 } 142 val[jj++] = *bv++; 143 } 144 irow++; 145 } 146 147 PetscFunctionReturn(0); 148 } 149 150 EXTERN_C_BEGIN 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatConvert_MUMPS_Base" 153 int MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) { 154 int ierr; 155 Mat B=*newmat; 156 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 157 158 PetscFunctionBegin; 159 if (B != A) { 160 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 161 } 162 B->ops->duplicate = mumps->MatDuplicate; 163 B->ops->view = mumps->MatView; 164 B->ops->assemblyend = mumps->MatAssemblyEnd; 165 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 166 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 167 B->ops->destroy = mumps->MatDestroy; 168 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 169 ierr = PetscFree(mumps);CHKERRQ(ierr); 170 *newmat = B; 171 PetscFunctionReturn(0); 172 } 173 EXTERN_C_END 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatDestroy_MUMPS" 177 int MatDestroy_MUMPS(Mat A) { 178 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 179 int ierr,size=lu->size; 180 int (*specialdestroy)(Mat); 181 PetscFunctionBegin; 182 if (lu->CleanUpMUMPS) { 183 /* Terminate instance, deallocate memories */ 184 lu->id.job=JOB_END; 185 #if defined(PETSC_USE_COMPLEX) 186 zmumps_c(&lu->id); 187 #else 188 dmumps_c(&lu->id); 189 #endif 190 if (lu->irn) { 191 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 192 } 193 if (lu->jcn) { 194 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 195 } 196 if (size>1 && lu->val) { 197 ierr = PetscFree(lu->val);CHKERRQ(ierr); 198 } 199 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 200 } 201 specialdestroy = lu->specialdestroy; 202 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 203 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatDestroy_AIJMUMPS" 209 int MatDestroy_AIJMUMPS(Mat A) { 210 int ierr, size; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 214 if (size==1) { 215 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 216 } else { 217 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 218 } 219 PetscFunctionReturn(0); 220 } 221 222 #undef __FUNCT__ 223 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 224 int MatDestroy_SBAIJMUMPS(Mat A) { 225 int ierr, size; 226 227 PetscFunctionBegin; 228 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 229 if (size==1) { 230 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr); 231 } else { 232 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatFactorInfo_MUMPS" 239 int MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 240 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 241 int ierr; 242 243 PetscFunctionBegin; 244 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 245 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 246 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 247 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 248 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 249 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 250 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 251 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 252 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 253 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 254 if (lu->myid == 0 && lu->id.ICNTL(11)>0) { 255 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 256 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 257 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 258 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 259 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 260 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 261 262 } 263 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 264 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 265 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 266 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 268 269 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 272 273 /* infomation local to each processor */ 274 if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 275 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 276 ierr = PetscSynchronizedFlush(A->comm); 277 if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 278 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 279 ierr = PetscSynchronizedFlush(A->comm); 280 if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 281 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 282 ierr = PetscSynchronizedFlush(A->comm); 283 284 if (lu->myid == 0){ /* information from the host */ 285 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 286 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 287 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 288 289 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 293 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 294 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 297 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 298 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 299 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 300 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 301 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 302 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 303 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 305 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 306 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 307 } 308 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatView_AIJMUMPS" 314 int MatView_AIJMUMPS(Mat A,PetscViewer viewer) { 315 int ierr; 316 PetscTruth isascii; 317 PetscViewerFormat format; 318 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 319 320 PetscFunctionBegin; 321 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 322 323 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 324 if (isascii) { 325 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 326 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 327 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatSolve_AIJMUMPS" 335 int MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 336 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 337 PetscScalar *array; 338 Vec x_seq; 339 IS iden; 340 VecScatter scat; 341 int ierr; 342 343 PetscFunctionBegin; 344 if (lu->size > 1){ 345 if (!lu->myid){ 346 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 347 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 348 } else { 349 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 350 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 351 } 352 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 353 ierr = ISDestroy(iden);CHKERRQ(ierr); 354 355 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 356 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 357 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 358 } else { /* size == 1 */ 359 ierr = VecCopy(b,x);CHKERRQ(ierr); 360 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 361 } 362 if (!lu->myid) { /* define rhs on the host */ 363 #if defined(PETSC_USE_COMPLEX) 364 lu->id.rhs = (mumps_double_complex*)array; 365 #else 366 lu->id.rhs = array; 367 #endif 368 } 369 370 /* solve phase */ 371 lu->id.job=3; 372 #if defined(PETSC_USE_COMPLEX) 373 zmumps_c(&lu->id); 374 #else 375 dmumps_c(&lu->id); 376 #endif 377 if (lu->id.INFOG(1) < 0) { 378 SETERRQ1(1,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 379 } 380 381 /* convert mumps solution x_seq to petsc mpi x */ 382 if (lu->size > 1) { 383 if (!lu->myid){ 384 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 385 } 386 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 387 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 388 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 389 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 390 } else { 391 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 392 } 393 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 input: 399 F: numeric factor 400 output: 401 nneg: total number of negative pivots 402 nzero: 0 403 npos: (global dimension of F) - nneg 404 */ 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 408 int MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 409 { 410 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 411 int ierr,neg,zero,pos,size; 412 413 PetscFunctionBegin; 414 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 415 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 416 if (size > 1 && lu->id.ICNTL(13) != 1){ 417 SETERRQ1(1,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 418 } 419 if (nneg){ 420 if (!lu->myid){ 421 *nneg = lu->id.INFOG(12); 422 } 423 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 424 } 425 if (nzero) *nzero = 0; 426 if (npos) *npos = F->M - (*nneg); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatFactorNumeric_MPIAIJMUMPS" 432 int MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) { 433 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 434 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 435 int rnz,nnz,ierr,nz,i,M=A->M,*ai,*aj,icntl; 436 PetscTruth valOnly,flg; 437 438 PetscFunctionBegin; 439 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 440 (*F)->ops->solve = MatSolve_AIJMUMPS; 441 442 /* Initialize a MUMPS instance */ 443 ierr = MPI_Comm_rank(A->comm, &lu->myid); 444 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 445 lua->myid = lu->myid; lua->size = lu->size; 446 lu->id.job = JOB_INIT; 447 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 448 lu->id.comm_fortran = lu->comm_mumps; 449 450 /* Set mumps options */ 451 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 452 lu->id.par=1; /* host participates factorizaton and solve */ 453 lu->id.sym=lu->sym; 454 if (lu->sym == 2){ 455 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 456 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 457 } 458 #if defined(PETSC_USE_COMPLEX) 459 zmumps_c(&lu->id); 460 #else 461 dmumps_c(&lu->id); 462 #endif 463 464 if (lu->size == 1){ 465 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 466 } else { 467 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 468 } 469 470 icntl=-1; 471 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 472 if (flg && icntl > 0) { 473 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 474 } else { /* no output */ 475 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 476 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 477 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 478 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 479 } 480 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 481 icntl=-1; 482 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 483 if (flg) { 484 if (icntl== 1){ 485 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 486 } else { 487 lu->id.ICNTL(7) = icntl; 488 } 489 } 490 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 491 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 492 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 493 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 494 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 495 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 496 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 497 498 /* 499 ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr); 500 if (flg){ 501 if (icntl >-1 && icntl <3 ){ 502 if (lu->myid==0) lu->id.ICNTL(16) = icntl; 503 } else { 504 SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl); 505 } 506 } 507 */ 508 509 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 510 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 511 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 512 PetscOptionsEnd(); 513 } 514 515 /* define matrix A */ 516 switch (lu->id.ICNTL(18)){ 517 case 0: /* centralized assembled matrix input (size=1) */ 518 if (!lu->myid) { 519 if (lua->isAIJ){ 520 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 521 nz = aa->nz; 522 ai = aa->i; aj = aa->j; lu->val = aa->a; 523 } else { 524 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 525 nz = aa->nz; 526 ai = aa->i; aj = aa->j; lu->val = aa->a; 527 } 528 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 529 ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr); 530 ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr); 531 nz = 0; 532 for (i=0; i<M; i++){ 533 rnz = ai[i+1] - ai[i]; 534 while (rnz--) { /* Fortran row/col index! */ 535 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 536 } 537 } 538 } 539 } 540 break; 541 case 3: /* distributed assembled matrix input (size>1) */ 542 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 543 valOnly = PETSC_FALSE; 544 } else { 545 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 546 } 547 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 548 break; 549 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 550 } 551 552 /* analysis phase */ 553 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 554 lu->id.n = M; 555 switch (lu->id.ICNTL(18)){ 556 case 0: /* centralized assembled matrix input */ 557 if (!lu->myid) { 558 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 559 if (lu->id.ICNTL(6)>1){ 560 #if defined(PETSC_USE_COMPLEX) 561 lu->id.a = (mumps_double_complex*)lu->val; 562 #else 563 lu->id.a = lu->val; 564 #endif 565 } 566 } 567 break; 568 case 3: /* distributed assembled matrix input (size>1) */ 569 lu->id.nz_loc = nnz; 570 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 571 if (lu->id.ICNTL(6)>1) { 572 #if defined(PETSC_USE_COMPLEX) 573 lu->id.a_loc = (mumps_double_complex*)lu->val; 574 #else 575 lu->id.a_loc = lu->val; 576 #endif 577 } 578 break; 579 } 580 lu->id.job=1; 581 #if defined(PETSC_USE_COMPLEX) 582 zmumps_c(&lu->id); 583 #else 584 dmumps_c(&lu->id); 585 #endif 586 if (lu->id.INFOG(1) < 0) { 587 SETERRQ1(1,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 588 } 589 } 590 591 /* numerical factorization phase */ 592 if(lu->id.ICNTL(18) == 0) { 593 if (!lu->myid) { 594 #if defined(PETSC_USE_COMPLEX) 595 lu->id.a = (mumps_double_complex*)lu->val; 596 #else 597 lu->id.a = lu->val; 598 #endif 599 } 600 } else { 601 #if defined(PETSC_USE_COMPLEX) 602 lu->id.a_loc = (mumps_double_complex*)lu->val; 603 #else 604 lu->id.a_loc = lu->val; 605 #endif 606 } 607 lu->id.job=2; 608 #if defined(PETSC_USE_COMPLEX) 609 zmumps_c(&lu->id); 610 #else 611 dmumps_c(&lu->id); 612 #endif 613 if (lu->id.INFOG(1) < 0) { 614 SETERRQ2(1,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 615 } 616 617 if (lu->myid==0 && lu->id.ICNTL(16) > 0){ 618 SETERRQ1(1," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 619 } 620 621 (*F)->assembled = PETSC_TRUE; 622 lu->matstruc = SAME_NONZERO_PATTERN; 623 lu->CleanUpMUMPS = PETSC_TRUE; 624 PetscFunctionReturn(0); 625 } 626 627 /* Note the Petsc r and c permutations are ignored */ 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 630 int MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 631 Mat B; 632 Mat_MUMPS *lu; 633 int ierr; 634 635 PetscFunctionBegin; 636 637 /* Create the factorization matrix */ 638 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 639 ierr = MatSetType(B,MATAIJMUMPS);CHKERRQ(ierr); 640 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 641 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 642 643 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 644 B->factor = FACTOR_LU; 645 lu = (Mat_MUMPS*)B->spptr; 646 lu->sym = 0; 647 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 648 649 *F = B; 650 PetscFunctionReturn(0); 651 } 652 653 /* Note the Petsc r permutation is ignored */ 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 656 int MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 657 Mat B; 658 Mat_MUMPS *lu; 659 int ierr; 660 661 PetscFunctionBegin; 662 663 /* Create the factorization matrix */ 664 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 665 ierr = MatSetType(B,MATAIJMUMPS);CHKERRQ(ierr); 666 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 667 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 668 669 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 670 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 671 B->factor = FACTOR_CHOLESKY; 672 lu = (Mat_MUMPS*)B->spptr; 673 lu->sym = 2; 674 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 675 676 *F = B; 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 682 int MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 683 int ierr; 684 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 685 686 PetscFunctionBegin; 687 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 688 689 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 690 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 691 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 692 PetscFunctionReturn(0); 693 } 694 695 EXTERN_C_BEGIN 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 698 int MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat) { 699 int ierr,size; 700 MPI_Comm comm; 701 Mat B=*newmat; 702 Mat_MUMPS *mumps; 703 704 PetscFunctionBegin; 705 if (B != A) { 706 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 707 } 708 709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 710 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 711 712 mumps->MatDuplicate = A->ops->duplicate; 713 mumps->MatView = A->ops->view; 714 mumps->MatAssemblyEnd = A->ops->assemblyend; 715 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 716 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 717 mumps->MatDestroy = A->ops->destroy; 718 mumps->specialdestroy = MatDestroy_AIJMUMPS; 719 mumps->CleanUpMUMPS = PETSC_FALSE; 720 mumps->isAIJ = PETSC_TRUE; 721 722 B->spptr = (void *)mumps; 723 B->ops->duplicate = MatDuplicate_AIJMUMPS; 724 B->ops->view = MatView_AIJMUMPS; 725 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 726 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 727 B->ops->destroy = MatDestroy_MUMPS; 728 729 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 730 if (size == 1) { 731 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 732 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 734 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 735 } else { 736 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 737 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 739 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 740 } 741 742 PetscLogInfo(0,"Using MUMPS for LU factorization and solves."); 743 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 744 *newmat = B; 745 PetscFunctionReturn(0); 746 } 747 EXTERN_C_END 748 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatDuplicate_AIJMUMPS" 751 int MatDuplicate_AIJMUMPS(Mat A, MatDuplicateOption op, Mat *M) { 752 int ierr; 753 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 754 755 PetscFunctionBegin; 756 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 757 ierr = MatConvert_AIJ_AIJMUMPS(*M,MATAIJMUMPS,M);CHKERRQ(ierr); 758 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 /*MC 763 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 764 and sequential matrices via the external package MUMPS. 765 766 If MUMPS is installed (see the manual for instructions 767 on how to declare the existence of external packages), 768 a matrix type can be constructed which invokes MUMPS solvers. 769 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 770 This matrix type is only supported for double precision real. 771 772 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 773 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 774 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 775 for communicators controlling multiple processes. It is recommended that you call both of 776 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 777 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 778 without data copy. 779 780 Options Database Keys: 781 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 782 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 783 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 784 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 785 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 786 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 787 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 788 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 789 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 790 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 791 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 792 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 793 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 794 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 795 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 796 797 Level: beginner 798 799 .seealso: MATSBAIJMUMPS 800 M*/ 801 802 EXTERN_C_BEGIN 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatCreate_AIJMUMPS" 805 int MatCreate_AIJMUMPS(Mat A) { 806 int ierr,size; 807 Mat A_diag; 808 MPI_Comm comm; 809 810 PetscFunctionBegin; 811 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 812 /* and AIJMUMPS types */ 813 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 814 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 815 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 816 if (size == 1) { 817 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 818 } else { 819 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 820 A_diag = ((Mat_MPIAIJ *)A->data)->A; 821 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr); 822 } 823 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 EXTERN_C_END 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 830 int MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) { 831 int ierr; 832 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 833 834 PetscFunctionBegin; 835 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 836 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 837 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 838 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 839 PetscFunctionReturn(0); 840 } 841 842 EXTERN_C_BEGIN 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 845 int MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 846 { 847 Mat A; 848 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 849 int ierr; 850 851 PetscFunctionBegin; 852 /* 853 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 854 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 855 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 856 block size info so that PETSc can determine the local size properly. The block size info is set 857 in the preallocation routine. 858 */ 859 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 860 A = ((Mat_MPISBAIJ *)B->data)->A; 861 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 EXTERN_C_END 865 866 EXTERN_C_BEGIN 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 869 int MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat) { 870 int ierr,size; 871 MPI_Comm comm; 872 Mat B=*newmat; 873 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 874 void (*f)(void); 875 876 PetscFunctionBegin; 877 if (B != A) { 878 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 879 } 880 881 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 882 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 883 884 mumps->MatDuplicate = A->ops->duplicate; 885 mumps->MatView = A->ops->view; 886 mumps->MatAssemblyEnd = A->ops->assemblyend; 887 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 888 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 889 mumps->MatDestroy = A->ops->destroy; 890 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 891 mumps->CleanUpMUMPS = PETSC_FALSE; 892 mumps->isAIJ = PETSC_FALSE; 893 894 B->spptr = (void *)mumps; 895 B->ops->duplicate = MatDuplicate_SBAIJMUMPS; 896 B->ops->view = MatView_AIJMUMPS; 897 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 898 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 899 B->ops->destroy = MatDestroy_MUMPS; 900 901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 902 if (size == 1) { 903 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 904 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 905 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 906 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 907 } else { 908 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 909 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 910 if (f) { 911 mumps->MatPreallocate = (int (*)(Mat,int,int,int*,int,int*))f; 912 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 913 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 914 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 915 } 916 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 917 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 918 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 919 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 920 } 921 922 PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves."); 923 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 924 *newmat = B; 925 PetscFunctionReturn(0); 926 } 927 EXTERN_C_END 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatDuplicate_SBAIJMUMPS" 931 int MatDuplicate_SBAIJMUMPS(Mat A, MatDuplicateOption op, Mat *M) { 932 int ierr; 933 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 934 935 PetscFunctionBegin; 936 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 937 ierr = MatConvert_SBAIJ_SBAIJMUMPS(*M,MATSBAIJMUMPS,M);CHKERRQ(ierr); 938 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 /*MC 943 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 944 distributed and sequential matrices via the external package MUMPS. 945 946 If MUMPS is installed (see the manual for instructions 947 on how to declare the existence of external packages), 948 a matrix type can be constructed which invokes MUMPS solvers. 949 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 950 This matrix type is only supported for double precision real. 951 952 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 953 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 954 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 955 for communicators controlling multiple processes. It is recommended that you call both of 956 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 957 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 958 without data copy. 959 960 Options Database Keys: 961 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 962 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 963 . -mat_mumps_icntl_4 <0,...,4> - print level 964 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 965 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 966 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 967 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 968 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 969 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 970 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 971 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 972 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 973 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 974 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 975 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 976 977 Level: beginner 978 979 .seealso: MATAIJMUMPS 980 M*/ 981 982 EXTERN_C_BEGIN 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 985 int MatCreate_SBAIJMUMPS(Mat A) { 986 int ierr,size; 987 988 PetscFunctionBegin; 989 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 990 /* and SBAIJMUMPS types */ 991 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 992 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 993 if (size == 1) { 994 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 995 } else { 996 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 997 } 998 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 EXTERN_C_END 1002