1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 int *irn,*jcn,sym; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 40 PetscTruth isAIJ,CleanUpMUMPS; 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatDestroy)(Mat); 47 PetscErrorCode (*specialdestroy)(Mat); 48 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 49 } Mat_MUMPS; 50 51 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 52 EXTERN_C_BEGIN 53 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 54 EXTERN_C_END 55 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 56 /* 57 input: 58 A - matrix in mpiaij or mpisbaij (bs=1) format 59 shift - 0: C style output triple; 1: Fortran style output triple. 60 valOnly - FALSE: spaces are allocated and values are set for the triple 61 TRUE: only the values in v array are updated 62 output: 63 nnz - dim of r, c, and v (number of local nonzero entries of A) 64 r, c, v - row and col index, matrix values (matrix triples) 65 */ 66 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 67 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 68 PetscErrorCode ierr; 69 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 70 PetscInt *row,*col; 71 PetscScalar *av, *bv,*val; 72 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 73 74 PetscFunctionBegin; 75 if (mumps->isAIJ){ 76 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 77 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 78 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 79 nz = aa->nz + bb->nz; 80 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 81 garray = mat->garray; 82 av=aa->a; bv=bb->a; 83 84 } else { 85 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 86 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 87 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 88 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 89 nz = aa->nz + bb->nz; 90 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 91 garray = mat->garray; 92 av=aa->a; bv=bb->a; 93 } 94 95 if (!valOnly){ 96 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 97 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 99 *r = row; *c = col; *v = val; 100 } else { 101 row = *r; col = *c; val = *v; 102 } 103 *nnz = nz; 104 105 jj = 0; irow = rstart; 106 for ( i=0; i<m; i++ ) { 107 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 108 countA = ai[i+1] - ai[i]; 109 countB = bi[i+1] - bi[i]; 110 bjj = bj + bi[i]; 111 112 /* get jB, the starting local col index for the 2nd B-part */ 113 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 114 j=-1; 115 do { 116 j++; 117 if (j == countB) break; 118 jcol = garray[bjj[j]]; 119 } while (jcol < colA_start); 120 jB = j; 121 122 /* B-part, smaller col index */ 123 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 124 for (j=0; j<jB; j++){ 125 jcol = garray[bjj[j]]; 126 if (!valOnly){ 127 row[jj] = irow + shift; col[jj] = jcol + shift; 128 129 } 130 val[jj++] = *bv++; 131 } 132 /* A-part */ 133 for (j=0; j<countA; j++){ 134 if (!valOnly){ 135 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 136 } 137 val[jj++] = *av++; 138 } 139 /* B-part, larger col index */ 140 for (j=jB; j<countB; j++){ 141 if (!valOnly){ 142 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 143 } 144 val[jj++] = *bv++; 145 } 146 irow++; 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 EXTERN_C_BEGIN 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatConvert_MUMPS_Base" 155 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 156 { 157 PetscErrorCode ierr; 158 Mat B=*newmat; 159 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 160 void (*f)(void); 161 162 PetscFunctionBegin; 163 if (reuse == MAT_INITIAL_MATRIX) { 164 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 165 } 166 B->ops->duplicate = mumps->MatDuplicate; 167 B->ops->view = mumps->MatView; 168 B->ops->assemblyend = mumps->MatAssemblyEnd; 169 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 170 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 171 B->ops->destroy = mumps->MatDestroy; 172 173 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 174 if (f) { 175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 176 } 177 ierr = PetscFree(mumps);CHKERRQ(ierr); 178 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 186 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 187 188 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 189 *newmat = B; 190 PetscFunctionReturn(0); 191 } 192 EXTERN_C_END 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatDestroy_MUMPS" 196 PetscErrorCode MatDestroy_MUMPS(Mat A) 197 { 198 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 199 PetscErrorCode ierr; 200 PetscMPIInt size=lu->size; 201 PetscErrorCode (*specialdestroy)(Mat); 202 PetscFunctionBegin; 203 if (lu->CleanUpMUMPS) { 204 /* Terminate instance, deallocate memories */ 205 lu->id.job=JOB_END; 206 #if defined(PETSC_USE_COMPLEX) 207 zmumps_c(&lu->id); 208 #else 209 dmumps_c(&lu->id); 210 #endif 211 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 212 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 213 if (size>1 && lu->val) { 214 ierr = PetscFree(lu->val);CHKERRQ(ierr); 215 } 216 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 217 } 218 specialdestroy = lu->specialdestroy; 219 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 220 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 221 PetscFunctionReturn(0); 222 } 223 224 #undef __FUNCT__ 225 #define __FUNCT__ "MatDestroy_AIJMUMPS" 226 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 227 { 228 PetscErrorCode ierr; 229 int size; 230 231 PetscFunctionBegin; 232 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 233 if (size==1) { 234 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 235 } else { 236 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 237 } 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 243 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 244 { 245 PetscErrorCode ierr; 246 int size; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 250 if (size==1) { 251 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 252 } else { 253 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatFactorInfo_MUMPS" 260 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 261 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 266 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 268 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 274 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 275 if (!lu->myid && lu->id.ICNTL(11)>0) { 276 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 277 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 278 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 279 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 280 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 281 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 282 283 } 284 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 285 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 286 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 287 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 288 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 289 290 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 293 294 /* infomation local to each processor */ 295 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 296 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 297 ierr = PetscSynchronizedFlush(A->comm); 298 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 299 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 300 ierr = PetscSynchronizedFlush(A->comm); 301 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 302 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 303 ierr = PetscSynchronizedFlush(A->comm); 304 305 if (!lu->myid){ /* information from the host */ 306 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 307 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 308 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 309 310 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 312 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 313 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 314 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 327 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 328 } 329 330 PetscFunctionReturn(0); 331 } 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatView_MUMPS" 335 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 336 PetscErrorCode ierr; 337 PetscTruth iascii; 338 PetscViewerFormat format; 339 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 340 341 PetscFunctionBegin; 342 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 343 344 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 345 if (iascii) { 346 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 347 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 348 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 349 } 350 } 351 PetscFunctionReturn(0); 352 } 353 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatSolve_AIJMUMPS" 356 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 357 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 358 PetscScalar *array; 359 Vec x_seq; 360 IS iden; 361 VecScatter scat; 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 if (lu->size > 1){ 366 if (!lu->myid){ 367 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.N,&x_seq);CHKERRQ(ierr); 368 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap.N,0,1,&iden);CHKERRQ(ierr); 369 } else { 370 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 371 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 372 } 373 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 374 ierr = ISDestroy(iden);CHKERRQ(ierr); 375 376 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 377 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 378 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 379 } else { /* size == 1 */ 380 ierr = VecCopy(b,x);CHKERRQ(ierr); 381 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 382 } 383 if (!lu->myid) { /* define rhs on the host */ 384 #if defined(PETSC_USE_COMPLEX) 385 lu->id.rhs = (mumps_double_complex*)array; 386 #else 387 lu->id.rhs = array; 388 #endif 389 } 390 391 /* solve phase */ 392 lu->id.job=3; 393 #if defined(PETSC_USE_COMPLEX) 394 zmumps_c(&lu->id); 395 #else 396 dmumps_c(&lu->id); 397 #endif 398 if (lu->id.INFOG(1) < 0) { 399 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 400 } 401 402 /* convert mumps solution x_seq to petsc mpi x */ 403 if (lu->size > 1) { 404 if (!lu->myid){ 405 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 406 } 407 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 408 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 409 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 410 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 411 } else { 412 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 413 } 414 415 PetscFunctionReturn(0); 416 } 417 418 /* 419 input: 420 F: numeric factor 421 output: 422 nneg: total number of negative pivots 423 nzero: 0 424 npos: (global dimension of F) - nneg 425 */ 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 429 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 430 { 431 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 432 PetscErrorCode ierr; 433 PetscMPIInt size; 434 435 PetscFunctionBegin; 436 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 437 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 438 if (size > 1 && lu->id.ICNTL(13) != 1){ 439 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 440 } 441 if (nneg){ 442 if (!lu->myid){ 443 *nneg = lu->id.INFOG(12); 444 } 445 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 446 } 447 if (nzero) *nzero = 0; 448 if (npos) *npos = F->rmap.N - (*nneg); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 454 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 455 { 456 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 457 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 458 PetscErrorCode ierr; 459 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 460 PetscTruth valOnly,flg; 461 Mat F_diag; 462 463 PetscFunctionBegin; 464 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 465 (*F)->ops->solve = MatSolve_AIJMUMPS; 466 467 /* Initialize a MUMPS instance */ 468 ierr = MPI_Comm_rank(A->comm, &lu->myid); 469 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 470 lua->myid = lu->myid; lua->size = lu->size; 471 lu->id.job = JOB_INIT; 472 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 473 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 474 475 /* Set mumps options */ 476 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 477 lu->id.par=1; /* host participates factorizaton and solve */ 478 lu->id.sym=lu->sym; 479 if (lu->sym == 2){ 480 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 481 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 482 } 483 #if defined(PETSC_USE_COMPLEX) 484 zmumps_c(&lu->id); 485 #else 486 dmumps_c(&lu->id); 487 #endif 488 489 if (lu->size == 1){ 490 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 491 } else { 492 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 493 } 494 495 icntl=-1; 496 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 497 if ((flg && icntl > 0) || PetscLogPrintInfo) { 498 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 499 } else { /* no output */ 500 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 501 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 502 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 503 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 504 } 505 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 506 icntl=-1; 507 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 508 if (flg) { 509 if (icntl== 1){ 510 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 511 } else { 512 lu->id.ICNTL(7) = icntl; 513 } 514 } 515 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 516 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 517 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 518 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 519 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 521 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 522 523 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 524 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 525 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 526 PetscOptionsEnd(); 527 } 528 529 /* define matrix A */ 530 switch (lu->id.ICNTL(18)){ 531 case 0: /* centralized assembled matrix input (size=1) */ 532 if (!lu->myid) { 533 if (lua->isAIJ){ 534 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 535 nz = aa->nz; 536 ai = aa->i; aj = aa->j; lu->val = aa->a; 537 } else { 538 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 539 nz = aa->nz; 540 ai = aa->i; aj = aa->j; lu->val = aa->a; 541 } 542 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 543 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 544 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 545 nz = 0; 546 for (i=0; i<M; i++){ 547 rnz = ai[i+1] - ai[i]; 548 while (rnz--) { /* Fortran row/col index! */ 549 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 550 } 551 } 552 } 553 } 554 break; 555 case 3: /* distributed assembled matrix input (size>1) */ 556 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 557 valOnly = PETSC_FALSE; 558 } else { 559 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 560 } 561 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 562 break; 563 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 564 } 565 566 /* analysis phase */ 567 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 568 lu->id.n = M; 569 switch (lu->id.ICNTL(18)){ 570 case 0: /* centralized assembled matrix input */ 571 if (!lu->myid) { 572 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 573 if (lu->id.ICNTL(6)>1){ 574 #if defined(PETSC_USE_COMPLEX) 575 lu->id.a = (mumps_double_complex*)lu->val; 576 #else 577 lu->id.a = lu->val; 578 #endif 579 } 580 } 581 break; 582 case 3: /* distributed assembled matrix input (size>1) */ 583 lu->id.nz_loc = nnz; 584 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 585 if (lu->id.ICNTL(6)>1) { 586 #if defined(PETSC_USE_COMPLEX) 587 lu->id.a_loc = (mumps_double_complex*)lu->val; 588 #else 589 lu->id.a_loc = lu->val; 590 #endif 591 } 592 break; 593 } 594 lu->id.job=1; 595 #if defined(PETSC_USE_COMPLEX) 596 zmumps_c(&lu->id); 597 #else 598 dmumps_c(&lu->id); 599 #endif 600 if (lu->id.INFOG(1) < 0) { 601 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 602 } 603 } 604 605 /* numerical factorization phase */ 606 if(!lu->id.ICNTL(18)) { 607 if (!lu->myid) { 608 #if defined(PETSC_USE_COMPLEX) 609 lu->id.a = (mumps_double_complex*)lu->val; 610 #else 611 lu->id.a = lu->val; 612 #endif 613 } 614 } else { 615 #if defined(PETSC_USE_COMPLEX) 616 lu->id.a_loc = (mumps_double_complex*)lu->val; 617 #else 618 lu->id.a_loc = lu->val; 619 #endif 620 } 621 lu->id.job=2; 622 #if defined(PETSC_USE_COMPLEX) 623 zmumps_c(&lu->id); 624 #else 625 dmumps_c(&lu->id); 626 #endif 627 if (lu->id.INFOG(1) < 0) { 628 if (lu->id.INFO(1) == -13) { 629 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 630 } else { 631 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 632 } 633 } 634 635 if (!lu->myid && lu->id.ICNTL(16) > 0){ 636 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 637 } 638 639 if (lu->size > 1){ 640 if ((*F)->factor == FACTOR_LU){ 641 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 642 } else { 643 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 644 } 645 F_diag->assembled = PETSC_TRUE; 646 } 647 (*F)->assembled = PETSC_TRUE; 648 lu->matstruc = SAME_NONZERO_PATTERN; 649 lu->CleanUpMUMPS = PETSC_TRUE; 650 PetscFunctionReturn(0); 651 } 652 653 /* Note the Petsc r and c permutations are ignored */ 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 656 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 657 Mat B; 658 Mat_MUMPS *lu; 659 PetscErrorCode ierr; 660 661 PetscFunctionBegin; 662 /* Create the factorization matrix */ 663 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 664 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 665 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 666 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 667 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 668 669 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 670 B->factor = FACTOR_LU; 671 lu = (Mat_MUMPS*)B->spptr; 672 lu->sym = 0; 673 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 674 675 *F = B; 676 PetscFunctionReturn(0); 677 } 678 679 /* Note the Petsc r permutation is ignored */ 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 682 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 683 Mat B; 684 Mat_MUMPS *lu; 685 PetscErrorCode ierr; 686 687 PetscFunctionBegin; 688 /* Create the factorization matrix */ 689 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 690 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 691 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 692 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 693 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 694 695 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 696 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 697 B->factor = FACTOR_CHOLESKY; 698 lu = (Mat_MUMPS*)B->spptr; 699 lu->sym = 2; 700 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 701 702 *F = B; 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 708 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 709 PetscErrorCode ierr; 710 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 711 712 PetscFunctionBegin; 713 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 714 715 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 716 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 717 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 718 PetscFunctionReturn(0); 719 } 720 721 EXTERN_C_BEGIN 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 724 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 725 { 726 PetscErrorCode ierr; 727 PetscMPIInt size; 728 MPI_Comm comm; 729 Mat B=*newmat; 730 Mat_MUMPS *mumps; 731 732 PetscFunctionBegin; 733 if (reuse == MAT_INITIAL_MATRIX) { 734 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 735 } 736 737 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 738 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 739 740 mumps->MatDuplicate = A->ops->duplicate; 741 mumps->MatView = A->ops->view; 742 mumps->MatAssemblyEnd = A->ops->assemblyend; 743 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 744 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 745 mumps->MatDestroy = A->ops->destroy; 746 mumps->specialdestroy = MatDestroy_AIJMUMPS; 747 mumps->CleanUpMUMPS = PETSC_FALSE; 748 mumps->isAIJ = PETSC_TRUE; 749 750 B->spptr = (void*)mumps; 751 B->ops->duplicate = MatDuplicate_MUMPS; 752 B->ops->view = MatView_MUMPS; 753 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 754 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 755 B->ops->destroy = MatDestroy_MUMPS; 756 757 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 758 if (size == 1) { 759 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 760 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 762 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 763 } else { 764 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 765 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 766 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 767 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 768 } 769 770 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 771 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 772 *newmat = B; 773 PetscFunctionReturn(0); 774 } 775 EXTERN_C_END 776 777 /*MC 778 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 779 and sequential matrices via the external package MUMPS. 780 781 If MUMPS is installed (see the manual for instructions 782 on how to declare the existence of external packages), 783 a matrix type can be constructed which invokes MUMPS solvers. 784 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 785 786 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 787 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 788 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 789 for communicators controlling multiple processes. It is recommended that you call both of 790 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 791 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 792 without data copy. 793 794 Options Database Keys: 795 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 796 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 797 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 798 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 799 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 800 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 801 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 802 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 803 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 804 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 805 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 806 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 807 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 808 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 809 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 810 811 Level: beginner 812 813 .seealso: MATSBAIJMUMPS 814 M*/ 815 816 EXTERN_C_BEGIN 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatCreate_AIJMUMPS" 819 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 820 { 821 PetscErrorCode ierr; 822 PetscMPIInt size; 823 MPI_Comm comm; 824 825 PetscFunctionBegin; 826 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 827 /* and AIJMUMPS types */ 828 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 829 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 831 if (size == 1) { 832 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 833 } else { 834 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 835 /* 836 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 837 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 838 */ 839 } 840 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 841 PetscFunctionReturn(0); 842 } 843 EXTERN_C_END 844 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 847 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 848 { 849 PetscErrorCode ierr; 850 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 851 852 PetscFunctionBegin; 853 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 854 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 855 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 856 PetscFunctionReturn(0); 857 } 858 859 EXTERN_C_BEGIN 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 862 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 863 { 864 Mat A; 865 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 866 PetscErrorCode ierr; 867 868 PetscFunctionBegin; 869 /* 870 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 871 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 872 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 873 block size info so that PETSc can determine the local size properly. The block size info is set 874 in the preallocation routine. 875 */ 876 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 877 A = ((Mat_MPISBAIJ *)B->data)->A; 878 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 EXTERN_C_END 882 883 EXTERN_C_BEGIN 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 886 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 887 { 888 PetscErrorCode ierr; 889 PetscMPIInt size; 890 MPI_Comm comm; 891 Mat B=*newmat; 892 Mat_MUMPS *mumps; 893 void (*f)(void); 894 895 PetscFunctionBegin; 896 if (reuse == MAT_INITIAL_MATRIX) { 897 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 898 } 899 900 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 901 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 902 903 mumps->MatDuplicate = A->ops->duplicate; 904 mumps->MatView = A->ops->view; 905 mumps->MatAssemblyEnd = A->ops->assemblyend; 906 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 907 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 908 mumps->MatDestroy = A->ops->destroy; 909 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 910 mumps->CleanUpMUMPS = PETSC_FALSE; 911 mumps->isAIJ = PETSC_FALSE; 912 913 B->spptr = (void*)mumps; 914 B->ops->duplicate = MatDuplicate_MUMPS; 915 B->ops->view = MatView_MUMPS; 916 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 917 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 918 B->ops->destroy = MatDestroy_MUMPS; 919 920 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 921 if (size == 1) { 922 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 923 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 924 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 925 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 926 } else { 927 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 928 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 929 if (f) { /* This case should always be true when this routine is called */ 930 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 931 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 932 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 933 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 934 } 935 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 936 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 937 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 938 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 939 } 940 941 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 942 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 943 *newmat = B; 944 PetscFunctionReturn(0); 945 } 946 EXTERN_C_END 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatDuplicate_MUMPS" 950 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 951 PetscErrorCode ierr; 952 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 953 954 PetscFunctionBegin; 955 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 956 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 /*MC 961 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 962 distributed and sequential matrices via the external package MUMPS. 963 964 If MUMPS is installed (see the manual for instructions 965 on how to declare the existence of external packages), 966 a matrix type can be constructed which invokes MUMPS solvers. 967 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 968 969 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 970 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 971 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 972 for communicators controlling multiple processes. It is recommended that you call both of 973 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 974 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 975 without data copy. 976 977 Options Database Keys: 978 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 979 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 980 . -mat_mumps_icntl_4 <0,...,4> - print level 981 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 982 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 983 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 984 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 985 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 986 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 987 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 988 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 989 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 990 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 991 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 992 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 993 994 Level: beginner 995 996 .seealso: MATAIJMUMPS 997 M*/ 998 999 EXTERN_C_BEGIN 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1002 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 PetscMPIInt size; 1006 1007 PetscFunctionBegin; 1008 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1009 /* and SBAIJMUMPS types */ 1010 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1011 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1012 if (size == 1) { 1013 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1014 } else { 1015 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1016 } 1017 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 EXTERN_C_END 1021