1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS_4.3.1 sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 int myid,size,*irn,*jcn,sym; 36 PetscScalar *val; 37 MPI_Comm comm_mumps; 38 39 PetscTruth isAIJ,CleanUpMUMPS; 40 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 41 PetscErrorCode (*MatView)(Mat,PetscViewer); 42 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 43 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 44 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 PetscErrorCode (*specialdestroy)(Mat); 47 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 48 } Mat_MUMPS; 49 50 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 51 EXTERN_C_BEGIN 52 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,MatReuse,Mat*); 53 EXTERN_C_END 54 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 55 /* 56 input: 57 A - matrix in mpiaij or mpisbaij (bs=1) format 58 shift - 0: C style output triple; 1: Fortran style output triple. 59 valOnly - FALSE: spaces are allocated and values are set for the triple 60 TRUE: only the values in v array are updated 61 output: 62 nnz - dim of r, c, and v (number of local nonzero entries of A) 63 r, c, v - row and col index, matrix values (matrix triples) 64 */ 65 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 66 int *ai, *aj, *bi, *bj, rstart,nz, *garray; 67 PetscErrorCode ierr; 68 int i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol; 69 int *row,*col; 70 PetscScalar *av, *bv,*val; 71 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 72 73 PetscFunctionBegin; 74 if (mumps->isAIJ){ 75 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 76 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 77 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 78 nz = aa->nz + bb->nz; 79 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 80 garray = mat->garray; 81 av=aa->a; bv=bb->a; 82 83 } else { 84 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 85 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 86 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 87 if (A->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->bs); 88 nz = aa->nz + bb->nz; 89 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart; 90 garray = mat->garray; 91 av=aa->a; bv=bb->a; 92 } 93 94 if (!valOnly){ 95 ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr); 96 ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr); 97 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 98 *r = row; *c = col; *v = val; 99 } else { 100 row = *r; col = *c; val = *v; 101 } 102 *nnz = nz; 103 104 jj = 0; irow = rstart; 105 for ( i=0; i<m; i++ ) { 106 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 107 countA = ai[i+1] - ai[i]; 108 countB = bi[i+1] - bi[i]; 109 bjj = bj + bi[i]; 110 111 /* get jB, the starting local col index for the 2nd B-part */ 112 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 113 j=-1; 114 do { 115 j++; 116 if (j == countB) break; 117 jcol = garray[bjj[j]]; 118 } while (jcol < colA_start); 119 jB = j; 120 121 /* B-part, smaller col index */ 122 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 123 for (j=0; j<jB; j++){ 124 jcol = garray[bjj[j]]; 125 if (!valOnly){ 126 row[jj] = irow + shift; col[jj] = jcol + shift; 127 128 } 129 val[jj++] = *bv++; 130 } 131 /* A-part */ 132 for (j=0; j<countA; j++){ 133 if (!valOnly){ 134 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 135 } 136 val[jj++] = *av++; 137 } 138 /* B-part, larger col index */ 139 for (j=jB; j<countB; j++){ 140 if (!valOnly){ 141 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 142 } 143 val[jj++] = *bv++; 144 } 145 irow++; 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 EXTERN_C_BEGIN 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatConvert_MUMPS_Base" 154 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat) \ 155 { 156 PetscErrorCode ierr; 157 Mat B=*newmat; 158 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 159 void (*f)(void); 160 161 PetscFunctionBegin; 162 if (reuse == MAT_INITIAL_MATRIX) { 163 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 164 } 165 B->ops->duplicate = mumps->MatDuplicate; 166 B->ops->view = mumps->MatView; 167 B->ops->assemblyend = mumps->MatAssemblyEnd; 168 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 169 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 170 B->ops->destroy = mumps->MatDestroy; 171 172 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 173 if (f) { 174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(FCNVOID)mumps->MatPreallocate);CHKERRQ(ierr); 175 } 176 ierr = PetscFree(mumps);CHKERRQ(ierr); 177 178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 187 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 188 *newmat = B; 189 PetscFunctionReturn(0); 190 } 191 EXTERN_C_END 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatDestroy_MUMPS" 195 PetscErrorCode MatDestroy_MUMPS(Mat A) 196 { 197 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 198 PetscErrorCode ierr; 199 int size=lu->size; 200 PetscErrorCode (*specialdestroy)(Mat); 201 PetscFunctionBegin; 202 if (lu->CleanUpMUMPS) { 203 /* Terminate instance, deallocate memories */ 204 lu->id.job=JOB_END; 205 #if defined(PETSC_USE_COMPLEX) 206 zmumps_c(&lu->id); 207 #else 208 dmumps_c(&lu->id); 209 #endif 210 if (lu->irn) { 211 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 212 } 213 if (lu->jcn) { 214 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 215 } 216 if (size>1 && lu->val) { 217 ierr = PetscFree(lu->val);CHKERRQ(ierr); 218 } 219 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 220 } 221 specialdestroy = lu->specialdestroy; 222 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 223 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 #undef __FUNCT__ 228 #define __FUNCT__ "MatDestroy_AIJMUMPS" 229 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 230 { 231 PetscErrorCode ierr; 232 int size; 233 234 PetscFunctionBegin; 235 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 236 if (size==1) { 237 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 238 } else { 239 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 246 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 247 { 248 PetscErrorCode ierr; 249 int size; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 253 if (size==1) { 254 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 255 } else { 256 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatFactorInfo_MUMPS" 263 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 264 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 265 PetscErrorCode ierr; 266 267 PetscFunctionBegin; 268 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 275 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 276 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 277 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 278 if (!lu->myid && lu->id.ICNTL(11)>0) { 279 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 280 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 281 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 282 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 283 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 284 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 285 286 } 287 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 288 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 289 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 292 293 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 294 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 295 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 296 297 /* infomation local to each processor */ 298 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 299 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 300 ierr = PetscSynchronizedFlush(A->comm); 301 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 302 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 303 ierr = PetscSynchronizedFlush(A->comm); 304 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 305 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 306 ierr = PetscSynchronizedFlush(A->comm); 307 308 if (!lu->myid){ /* information from the host */ 309 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 312 313 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 314 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 328 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 329 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 330 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 331 } 332 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatView_AIJMUMPS" 338 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) { 339 PetscErrorCode ierr; 340 PetscTruth iascii; 341 PetscViewerFormat format; 342 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 343 344 PetscFunctionBegin; 345 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 346 347 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 348 if (iascii) { 349 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 350 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 351 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 352 } 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatSolve_AIJMUMPS" 359 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 360 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 361 PetscScalar *array; 362 Vec x_seq; 363 IS iden; 364 VecScatter scat; 365 PetscErrorCode ierr; 366 367 PetscFunctionBegin; 368 if (lu->size > 1){ 369 if (!lu->myid){ 370 ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr); 371 ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr); 372 } else { 373 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 374 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 375 } 376 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 377 ierr = ISDestroy(iden);CHKERRQ(ierr); 378 379 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 380 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 381 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 382 } else { /* size == 1 */ 383 ierr = VecCopy(b,x);CHKERRQ(ierr); 384 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 385 } 386 if (!lu->myid) { /* define rhs on the host */ 387 #if defined(PETSC_USE_COMPLEX) 388 lu->id.rhs = (mumps_double_complex*)array; 389 #else 390 lu->id.rhs = array; 391 #endif 392 } 393 394 /* solve phase */ 395 lu->id.job=3; 396 #if defined(PETSC_USE_COMPLEX) 397 zmumps_c(&lu->id); 398 #else 399 dmumps_c(&lu->id); 400 #endif 401 if (lu->id.INFOG(1) < 0) { 402 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 403 } 404 405 /* convert mumps solution x_seq to petsc mpi x */ 406 if (lu->size > 1) { 407 if (!lu->myid){ 408 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 409 } 410 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 411 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 412 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 413 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 414 } else { 415 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 416 } 417 418 PetscFunctionReturn(0); 419 } 420 421 /* 422 input: 423 F: numeric factor 424 output: 425 nneg: total number of negative pivots 426 nzero: 0 427 npos: (global dimension of F) - nneg 428 */ 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 432 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 433 { 434 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 435 PetscErrorCode ierr; 436 int size; 437 438 PetscFunctionBegin; 439 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 440 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 441 if (size > 1 && lu->id.ICNTL(13) != 1){ 442 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 443 } 444 if (nneg){ 445 if (!lu->myid){ 446 *nneg = lu->id.INFOG(12); 447 } 448 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 449 } 450 if (nzero) *nzero = 0; 451 if (npos) *npos = F->M - (*nneg); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 457 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 458 { 459 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 460 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 461 PetscErrorCode ierr; 462 PetscInt rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl; 463 PetscTruth valOnly,flg; 464 465 PetscFunctionBegin; 466 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 467 (*F)->ops->solve = MatSolve_AIJMUMPS; 468 469 /* Initialize a MUMPS instance */ 470 ierr = MPI_Comm_rank(A->comm, &lu->myid); 471 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 472 lua->myid = lu->myid; lua->size = lu->size; 473 lu->id.job = JOB_INIT; 474 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 475 lu->id.comm_fortran = lu->comm_mumps; 476 477 /* Set mumps options */ 478 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 479 lu->id.par=1; /* host participates factorizaton and solve */ 480 lu->id.sym=lu->sym; 481 if (lu->sym == 2){ 482 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 483 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 484 } 485 #if defined(PETSC_USE_COMPLEX) 486 zmumps_c(&lu->id); 487 #else 488 dmumps_c(&lu->id); 489 #endif 490 491 if (lu->size == 1){ 492 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 493 } else { 494 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 495 } 496 497 icntl=-1; 498 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 499 if ((flg && icntl > 0) || PetscLogPrintInfo) { 500 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 501 } else { /* no output */ 502 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 503 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 504 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 505 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 506 } 507 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 508 icntl=-1; 509 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 510 if (flg) { 511 if (icntl== 1){ 512 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 513 } else { 514 lu->id.ICNTL(7) = icntl; 515 } 516 } 517 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 518 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 519 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 521 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 522 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 523 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 524 525 /* 526 ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr); 527 if (flg){ 528 if (icntl >-1 && icntl <3 ){ 529 if (lu->myid==0) lu->id.ICNTL(16) = icntl; 530 } else { 531 SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl); 532 } 533 } 534 */ 535 536 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 537 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 538 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 539 PetscOptionsEnd(); 540 } 541 542 /* define matrix A */ 543 switch (lu->id.ICNTL(18)){ 544 case 0: /* centralized assembled matrix input (size=1) */ 545 if (!lu->myid) { 546 if (lua->isAIJ){ 547 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 548 nz = aa->nz; 549 ai = aa->i; aj = aa->j; lu->val = aa->a; 550 } else { 551 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 552 nz = aa->nz; 553 ai = aa->i; aj = aa->j; lu->val = aa->a; 554 } 555 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 556 ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr); 557 ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr); 558 nz = 0; 559 for (i=0; i<M; i++){ 560 rnz = ai[i+1] - ai[i]; 561 while (rnz--) { /* Fortran row/col index! */ 562 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 563 } 564 } 565 } 566 } 567 break; 568 case 3: /* distributed assembled matrix input (size>1) */ 569 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 570 valOnly = PETSC_FALSE; 571 } else { 572 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 573 } 574 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 575 break; 576 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 577 } 578 579 /* analysis phase */ 580 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 581 lu->id.n = M; 582 switch (lu->id.ICNTL(18)){ 583 case 0: /* centralized assembled matrix input */ 584 if (!lu->myid) { 585 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 586 if (lu->id.ICNTL(6)>1){ 587 #if defined(PETSC_USE_COMPLEX) 588 lu->id.a = (mumps_double_complex*)lu->val; 589 #else 590 lu->id.a = lu->val; 591 #endif 592 } 593 } 594 break; 595 case 3: /* distributed assembled matrix input (size>1) */ 596 lu->id.nz_loc = nnz; 597 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 598 if (lu->id.ICNTL(6)>1) { 599 #if defined(PETSC_USE_COMPLEX) 600 lu->id.a_loc = (mumps_double_complex*)lu->val; 601 #else 602 lu->id.a_loc = lu->val; 603 #endif 604 } 605 break; 606 } 607 lu->id.job=1; 608 #if defined(PETSC_USE_COMPLEX) 609 zmumps_c(&lu->id); 610 #else 611 dmumps_c(&lu->id); 612 #endif 613 if (lu->id.INFOG(1) < 0) { 614 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 615 } 616 } 617 618 /* numerical factorization phase */ 619 if(!lu->id.ICNTL(18)) { 620 if (!lu->myid) { 621 #if defined(PETSC_USE_COMPLEX) 622 lu->id.a = (mumps_double_complex*)lu->val; 623 #else 624 lu->id.a = lu->val; 625 #endif 626 } 627 } else { 628 #if defined(PETSC_USE_COMPLEX) 629 lu->id.a_loc = (mumps_double_complex*)lu->val; 630 #else 631 lu->id.a_loc = lu->val; 632 #endif 633 } 634 lu->id.job=2; 635 #if defined(PETSC_USE_COMPLEX) 636 zmumps_c(&lu->id); 637 #else 638 dmumps_c(&lu->id); 639 #endif 640 if (lu->id.INFOG(1) < 0) { 641 if (lu->id.INFO(1) == -13) { 642 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 643 } else { 644 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 645 } 646 } 647 648 if (!lu->myid && lu->id.ICNTL(16) > 0){ 649 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 650 } 651 652 (*F)->assembled = PETSC_TRUE; 653 lu->matstruc = SAME_NONZERO_PATTERN; 654 lu->CleanUpMUMPS = PETSC_TRUE; 655 PetscFunctionReturn(0); 656 } 657 658 /* Note the Petsc r and c permutations are ignored */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 661 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 662 Mat B; 663 Mat_MUMPS *lu; 664 PetscErrorCode ierr; 665 666 PetscFunctionBegin; 667 668 /* Create the factorization matrix */ 669 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 670 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 671 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 672 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 673 674 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 675 B->factor = FACTOR_LU; 676 lu = (Mat_MUMPS*)B->spptr; 677 lu->sym = 0; 678 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 679 680 *F = B; 681 PetscFunctionReturn(0); 682 } 683 684 /* Note the Petsc r permutation is ignored */ 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 687 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 688 Mat B; 689 Mat_MUMPS *lu; 690 PetscErrorCode ierr; 691 692 PetscFunctionBegin; 693 /* Create the factorization matrix */ 694 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr); 695 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 696 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 697 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 698 699 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 700 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 701 B->factor = FACTOR_CHOLESKY; 702 lu = (Mat_MUMPS*)B->spptr; 703 lu->sym = 2; 704 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 705 706 *F = B; 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 712 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 713 PetscErrorCode ierr; 714 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 715 716 PetscFunctionBegin; 717 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 718 719 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 720 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 721 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 722 PetscFunctionReturn(0); 723 } 724 725 EXTERN_C_BEGIN 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 728 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 729 { 730 PetscErrorCode ierr; 731 PetscMPIInt size; 732 MPI_Comm comm; 733 Mat B=*newmat; 734 Mat_MUMPS *mumps; 735 736 PetscFunctionBegin; 737 if (reuse == MAT_INITIAL_MATRIX) { 738 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 739 } 740 741 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 742 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 743 744 mumps->MatDuplicate = A->ops->duplicate; 745 mumps->MatView = A->ops->view; 746 mumps->MatAssemblyEnd = A->ops->assemblyend; 747 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 748 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 749 mumps->MatDestroy = A->ops->destroy; 750 mumps->specialdestroy = MatDestroy_AIJMUMPS; 751 mumps->CleanUpMUMPS = PETSC_FALSE; 752 mumps->isAIJ = PETSC_TRUE; 753 754 B->spptr = (void*)mumps; 755 B->ops->duplicate = MatDuplicate_MUMPS; 756 B->ops->view = MatView_AIJMUMPS; 757 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 758 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 759 B->ops->destroy = MatDestroy_MUMPS; 760 761 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 762 if (size == 1) { 763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 764 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 766 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 767 } else { 768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 769 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 771 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 772 } 773 774 ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for LU factorization and solves.\n"));CHKERRQ(ierr); 775 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 776 *newmat = B; 777 PetscFunctionReturn(0); 778 } 779 EXTERN_C_END 780 781 /*MC 782 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 783 and sequential matrices via the external package MUMPS. 784 785 If MUMPS is installed (see the manual for instructions 786 on how to declare the existence of external packages), 787 a matrix type can be constructed which invokes MUMPS solvers. 788 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 789 790 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 791 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 792 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 793 for communicators controlling multiple processes. It is recommended that you call both of 794 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 795 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 796 without data copy. 797 798 Options Database Keys: 799 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 800 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 801 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 802 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 803 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 804 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 805 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 806 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 807 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 808 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 809 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 810 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 811 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 812 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 813 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 814 815 Level: beginner 816 817 .seealso: MATSBAIJMUMPS 818 M*/ 819 820 EXTERN_C_BEGIN 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatCreate_AIJMUMPS" 823 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 824 { 825 PetscErrorCode ierr; 826 int size; 827 Mat A_diag; 828 MPI_Comm comm; 829 830 PetscFunctionBegin; 831 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 832 /* and AIJMUMPS types */ 833 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 834 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 836 if (size == 1) { 837 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 838 } else { 839 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 840 A_diag = ((Mat_MPIAIJ *)A->data)->A; 841 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 842 } 843 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 EXTERN_C_END 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 850 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 851 { 852 PetscErrorCode ierr; 853 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 854 855 PetscFunctionBegin; 856 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 857 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 858 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 859 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 860 PetscFunctionReturn(0); 861 } 862 863 EXTERN_C_BEGIN 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 866 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 867 { 868 Mat A; 869 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 /* 874 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 875 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 876 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 877 block size info so that PETSc can determine the local size properly. The block size info is set 878 in the preallocation routine. 879 */ 880 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 881 A = ((Mat_MPISBAIJ *)B->data)->A; 882 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 EXTERN_C_END 886 887 EXTERN_C_BEGIN 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 890 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 891 { 892 PetscErrorCode ierr; 893 PetscMPIInt size; 894 MPI_Comm comm; 895 Mat B=*newmat; 896 Mat_MUMPS *mumps; 897 void (*f)(void); 898 899 PetscFunctionBegin; 900 if (reuse == MAT_INITIAL_MATRIX) { 901 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 902 } 903 904 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 905 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 906 907 mumps->MatDuplicate = A->ops->duplicate; 908 mumps->MatView = A->ops->view; 909 mumps->MatAssemblyEnd = A->ops->assemblyend; 910 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 911 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 912 mumps->MatDestroy = A->ops->destroy; 913 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 914 mumps->CleanUpMUMPS = PETSC_FALSE; 915 mumps->isAIJ = PETSC_FALSE; 916 917 B->spptr = (void*)mumps; 918 B->ops->duplicate = MatDuplicate_MUMPS; 919 B->ops->view = MatView_AIJMUMPS; 920 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 921 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 922 B->ops->destroy = MatDestroy_MUMPS; 923 924 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 925 if (size == 1) { 926 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 927 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 928 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 929 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 930 } else { 931 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 932 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr); 933 if (f) { /* This case should always be true when this routine is called */ 934 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 935 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 936 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 937 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 938 } 939 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 940 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 941 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 942 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 943 } 944 945 ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for Cholesky factorization and solves.\n"));CHKERRQ(ierr); 946 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 947 *newmat = B; 948 PetscFunctionReturn(0); 949 } 950 EXTERN_C_END 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatDuplicate_MUMPS" 954 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 955 PetscErrorCode ierr; 956 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 957 958 PetscFunctionBegin; 959 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 960 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 /*MC 965 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 966 distributed and sequential matrices via the external package MUMPS. 967 968 If MUMPS is installed (see the manual for instructions 969 on how to declare the existence of external packages), 970 a matrix type can be constructed which invokes MUMPS solvers. 971 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 972 973 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 974 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 975 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 976 for communicators controlling multiple processes. It is recommended that you call both of 977 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 978 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 979 without data copy. 980 981 Options Database Keys: 982 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 983 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 984 . -mat_mumps_icntl_4 <0,...,4> - print level 985 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 986 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 987 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 988 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 989 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 990 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 991 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 992 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 993 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 994 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 995 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 996 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 997 998 Level: beginner 999 1000 .seealso: MATAIJMUMPS 1001 M*/ 1002 1003 EXTERN_C_BEGIN 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1006 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1007 { 1008 PetscErrorCode ierr; 1009 int size; 1010 1011 PetscFunctionBegin; 1012 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1013 /* and SBAIJMUMPS types */ 1014 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1015 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1016 if (size == 1) { 1017 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1018 } else { 1019 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1020 } 1021 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 EXTERN_C_END 1025