1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the MUMPS sparse solver 5 */ 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "zmumps_c.h" 14 #else 15 #include "dmumps_c.h" 16 #endif 17 EXTERN_C_END 18 #define JOB_INIT -1 19 #define JOB_END -2 20 /* macros s.t. indices match MUMPS documentation */ 21 #define ICNTL(I) icntl[(I)-1] 22 #define CNTL(I) cntl[(I)-1] 23 #define INFOG(I) infog[(I)-1] 24 #define INFO(I) info[(I)-1] 25 #define RINFOG(I) rinfog[(I)-1] 26 #define RINFO(I) rinfo[(I)-1] 27 28 typedef struct { 29 #if defined(PETSC_USE_COMPLEX) 30 ZMUMPS_STRUC_C id; 31 #else 32 DMUMPS_STRUC_C id; 33 #endif 34 MatStructure matstruc; 35 PetscMPIInt myid,size; 36 int *irn,*jcn,sym; 37 PetscScalar *val; 38 MPI_Comm comm_mumps; 39 40 PetscTruth isAIJ,CleanUpMUMPS; 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*); 46 PetscErrorCode (*MatDestroy)(Mat); 47 PetscErrorCode (*specialdestroy)(Mat); 48 PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*); 49 } Mat_MUMPS; 50 51 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*); 52 EXTERN_C_BEGIN 53 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*); 54 EXTERN_C_END 55 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */ 56 /* 57 input: 58 A - matrix in mpiaij or mpisbaij (bs=1) format 59 shift - 0: C style output triple; 1: Fortran style output triple. 60 valOnly - FALSE: spaces are allocated and values are set for the triple 61 TRUE: only the values in v array are updated 62 output: 63 nnz - dim of r, c, and v (number of local nonzero entries of A) 64 r, c, v - row and col index, matrix values (matrix triples) 65 */ 66 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) { 67 PetscInt *ai, *aj, *bi, *bj, rstart,nz, *garray; 68 PetscErrorCode ierr; 69 PetscInt i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol; 70 PetscInt *row,*col; 71 PetscScalar *av, *bv,*val; 72 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 73 74 PetscFunctionBegin; 75 if (mumps->isAIJ){ 76 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 77 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data; 78 Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data; 79 nz = aa->nz + bb->nz; 80 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 81 garray = mat->garray; 82 av=aa->a; bv=bb->a; 83 84 } else { 85 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data; 86 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data; 87 Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data; 88 if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs); 89 nz = aa->nz + bb->nz; 90 ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart; 91 garray = mat->garray; 92 av=aa->a; bv=bb->a; 93 } 94 95 if (!valOnly){ 96 ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr); 97 ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr); 98 ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr); 99 *r = row; *c = col; *v = val; 100 } else { 101 row = *r; col = *c; val = *v; 102 } 103 *nnz = nz; 104 105 jj = 0; irow = rstart; 106 for ( i=0; i<m; i++ ) { 107 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 108 countA = ai[i+1] - ai[i]; 109 countB = bi[i+1] - bi[i]; 110 bjj = bj + bi[i]; 111 112 /* get jB, the starting local col index for the 2nd B-part */ 113 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 114 j=-1; 115 do { 116 j++; 117 if (j == countB) break; 118 jcol = garray[bjj[j]]; 119 } while (jcol < colA_start); 120 jB = j; 121 122 /* B-part, smaller col index */ 123 colA_start = rstart + ajj[0]; /* the smallest col index for A */ 124 for (j=0; j<jB; j++){ 125 jcol = garray[bjj[j]]; 126 if (!valOnly){ 127 row[jj] = irow + shift; col[jj] = jcol + shift; 128 129 } 130 val[jj++] = *bv++; 131 } 132 /* A-part */ 133 for (j=0; j<countA; j++){ 134 if (!valOnly){ 135 row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift; 136 } 137 val[jj++] = *av++; 138 } 139 /* B-part, larger col index */ 140 for (j=jB; j<countB; j++){ 141 if (!valOnly){ 142 row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift; 143 } 144 val[jj++] = *bv++; 145 } 146 irow++; 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 EXTERN_C_BEGIN 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatConvert_MUMPS_Base" 155 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \ 156 { 157 PetscErrorCode ierr; 158 Mat B=*newmat; 159 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 160 void (*f)(void); 161 162 PetscFunctionBegin; 163 if (reuse == MAT_INITIAL_MATRIX) { 164 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 165 } 166 B->ops->duplicate = mumps->MatDuplicate; 167 B->ops->view = mumps->MatView; 168 B->ops->assemblyend = mumps->MatAssemblyEnd; 169 B->ops->lufactorsymbolic = mumps->MatLUFactorSymbolic; 170 B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic; 171 B->ops->destroy = mumps->MatDestroy; 172 173 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 174 if (f) { 175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr); 176 } 177 178 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 180 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr); 182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 183 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 184 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr); 185 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 186 187 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 188 *newmat = B; 189 PetscFunctionReturn(0); 190 } 191 EXTERN_C_END 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatDestroy_MUMPS" 195 PetscErrorCode MatDestroy_MUMPS(Mat A) 196 { 197 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 198 PetscErrorCode ierr; 199 PetscMPIInt size=lu->size; 200 PetscErrorCode (*specialdestroy)(Mat); 201 PetscFunctionBegin; 202 if (lu->CleanUpMUMPS) { 203 /* Terminate instance, deallocate memories */ 204 lu->id.job=JOB_END; 205 #if defined(PETSC_USE_COMPLEX) 206 zmumps_c(&lu->id); 207 #else 208 dmumps_c(&lu->id); 209 #endif 210 ierr = PetscFree(lu->irn);CHKERRQ(ierr); 211 ierr = PetscFree(lu->jcn);CHKERRQ(ierr); 212 if (size>1 && lu->val) { 213 ierr = PetscFree(lu->val);CHKERRQ(ierr); 214 } 215 ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr); 216 } 217 specialdestroy = lu->specialdestroy; 218 ierr = (*specialdestroy)(A);CHKERRQ(ierr); 219 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 #undef __FUNCT__ 224 #define __FUNCT__ "MatDestroy_AIJMUMPS" 225 PetscErrorCode MatDestroy_AIJMUMPS(Mat A) 226 { 227 PetscErrorCode ierr; 228 int size; 229 230 PetscFunctionBegin; 231 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 232 if (size==1) { 233 ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 234 } else { 235 ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 236 } 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatDestroy_SBAIJMUMPS" 242 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A) 243 { 244 PetscErrorCode ierr; 245 int size; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 249 if (size==1) { 250 ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 251 } else { 252 ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 253 } 254 PetscFunctionReturn(0); 255 } 256 257 #undef __FUNCT__ 258 #define __FUNCT__ "MatFactorInfo_MUMPS" 259 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) { 260 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 261 PetscErrorCode ierr; 262 263 PetscFunctionBegin; 264 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr); 265 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",lu->id.sym);CHKERRQ(ierr); 266 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",lu->id.par);CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",lu->id.ICNTL(4));CHKERRQ(ierr); 268 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",lu->id.ICNTL(5));CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",lu->id.ICNTL(6));CHKERRQ(ierr); 270 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (matrix ordering): %d \n",lu->id.ICNTL(7));CHKERRQ(ierr); 271 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(9) (A/A^T x=b is solved): %d \n",lu->id.ICNTL(9));CHKERRQ(ierr); 272 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr); 273 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",lu->id.ICNTL(11));CHKERRQ(ierr); 274 if (!lu->myid && lu->id.ICNTL(11)>0) { 275 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(4) (inf norm of input mat): %g\n",lu->id.RINFOG(4));CHKERRQ(ierr); 276 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(5) (inf norm of solution): %g\n",lu->id.RINFOG(5));CHKERRQ(ierr); 277 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(6) (inf norm of residual): %g\n",lu->id.RINFOG(6));CHKERRQ(ierr); 278 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr); 279 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(9) (error estimate): %g \n",lu->id.RINFOG(9));CHKERRQ(ierr); 280 ierr = PetscPrintf(PETSC_COMM_SELF," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr); 281 282 } 283 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",lu->id.ICNTL(12));CHKERRQ(ierr); 284 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (efficiency control): %d \n",lu->id.ICNTL(13));CHKERRQ(ierr); 285 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr); 286 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(15) (efficiency control): %d \n",lu->id.ICNTL(15));CHKERRQ(ierr); 287 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",lu->id.ICNTL(18));CHKERRQ(ierr); 288 289 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",lu->id.CNTL(1));CHKERRQ(ierr); 290 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr); 291 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",lu->id.CNTL(3));CHKERRQ(ierr); 292 293 /* infomation local to each processor */ 294 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr); 295 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr); 296 ierr = PetscSynchronizedFlush(A->comm); 297 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr); 298 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr); 299 ierr = PetscSynchronizedFlush(A->comm); 300 if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr); 301 ierr = PetscSynchronizedPrintf(A->comm," [%d] %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr); 302 ierr = PetscSynchronizedFlush(A->comm); 303 304 if (!lu->myid){ /* information from the host */ 305 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr); 306 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr); 307 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr); 308 309 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr); 312 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr); 313 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr); 314 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr); 315 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr); 316 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr); 317 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr); 318 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr); 319 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr); 320 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr); 321 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr); 322 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr); 323 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr); 324 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr); 325 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr); 326 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr); 327 } 328 329 PetscFunctionReturn(0); 330 } 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatView_MUMPS" 334 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) { 335 PetscErrorCode ierr; 336 PetscTruth iascii; 337 PetscViewerFormat format; 338 Mat_MUMPS *mumps=(Mat_MUMPS*)(A->spptr); 339 340 PetscFunctionBegin; 341 ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr); 342 343 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 344 if (iascii) { 345 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 346 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 347 ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr); 348 } 349 } 350 PetscFunctionReturn(0); 351 } 352 353 #undef __FUNCT__ 354 #define __FUNCT__ "MatSolve_AIJMUMPS" 355 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) { 356 Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr; 357 PetscScalar *array; 358 Vec x_seq; 359 IS iden; 360 VecScatter scat; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 if (lu->size > 1){ 365 if (!lu->myid){ 366 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.N,&x_seq);CHKERRQ(ierr); 367 ierr = ISCreateStride(PETSC_COMM_SELF,A->rmap.N,0,1,&iden);CHKERRQ(ierr); 368 } else { 369 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr); 370 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr); 371 } 372 ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr); 373 ierr = ISDestroy(iden);CHKERRQ(ierr); 374 375 ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 376 ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 377 if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);} 378 } else { /* size == 1 */ 379 ierr = VecCopy(b,x);CHKERRQ(ierr); 380 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 381 } 382 if (!lu->myid) { /* define rhs on the host */ 383 #if defined(PETSC_USE_COMPLEX) 384 lu->id.rhs = (mumps_double_complex*)array; 385 #else 386 lu->id.rhs = array; 387 #endif 388 } 389 390 /* solve phase */ 391 lu->id.job=3; 392 #if defined(PETSC_USE_COMPLEX) 393 zmumps_c(&lu->id); 394 #else 395 dmumps_c(&lu->id); 396 #endif 397 if (lu->id.INFOG(1) < 0) { 398 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 399 } 400 401 /* convert mumps solution x_seq to petsc mpi x */ 402 if (lu->size > 1) { 403 if (!lu->myid){ 404 ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr); 405 } 406 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 407 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 408 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 409 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 410 } else { 411 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 412 } 413 414 PetscFunctionReturn(0); 415 } 416 417 /* 418 input: 419 F: numeric factor 420 output: 421 nneg: total number of negative pivots 422 nzero: 0 423 npos: (global dimension of F) - nneg 424 */ 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS" 428 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos) 429 { 430 Mat_MUMPS *lu =(Mat_MUMPS*)F->spptr; 431 PetscErrorCode ierr; 432 PetscMPIInt size; 433 434 PetscFunctionBegin; 435 ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr); 436 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 437 if (size > 1 && lu->id.ICNTL(13) != 1){ 438 SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13)); 439 } 440 if (nneg){ 441 if (!lu->myid){ 442 *nneg = lu->id.INFOG(12); 443 } 444 ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr); 445 } 446 if (nzero) *nzero = 0; 447 if (npos) *npos = F->rmap.N - (*nneg); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS" 453 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F) 454 { 455 Mat_MUMPS *lu =(Mat_MUMPS*)(*F)->spptr; 456 Mat_MUMPS *lua=(Mat_MUMPS*)(A)->spptr; 457 PetscErrorCode ierr; 458 PetscInt rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl; 459 PetscTruth valOnly,flg; 460 Mat F_diag; 461 462 PetscFunctionBegin; 463 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 464 (*F)->ops->solve = MatSolve_AIJMUMPS; 465 466 /* Initialize a MUMPS instance */ 467 ierr = MPI_Comm_rank(A->comm, &lu->myid); 468 ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr); 469 lua->myid = lu->myid; lua->size = lu->size; 470 lu->id.job = JOB_INIT; 471 ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr); 472 ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr); 473 474 /* Set mumps options */ 475 ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr); 476 lu->id.par=1; /* host participates factorizaton and solve */ 477 lu->id.sym=lu->sym; 478 if (lu->sym == 2){ 479 ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr); 480 if (flg && icntl == 1) lu->id.sym=icntl; /* matrix is spd */ 481 } 482 #if defined(PETSC_USE_COMPLEX) 483 zmumps_c(&lu->id); 484 #else 485 dmumps_c(&lu->id); 486 #endif 487 488 if (lu->size == 1){ 489 lu->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 490 } else { 491 lu->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 492 } 493 494 icntl=-1; 495 ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr); 496 if ((flg && icntl > 0) || PetscLogPrintInfo) { 497 lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */ 498 } else { /* no output */ 499 lu->id.ICNTL(1) = 0; /* error message, default= 6 */ 500 lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */ 501 lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */ 502 lu->id.ICNTL(4) = 0; /* level of printing, 0,1,2,3,4, default=2 */ 503 } 504 ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr); 505 icntl=-1; 506 ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr); 507 if (flg) { 508 if (icntl== 1){ 509 SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n"); 510 } else { 511 lu->id.ICNTL(7) = icntl; 512 } 513 } 514 ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr); 515 ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr); 516 ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr); 517 ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr); 518 ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr); 519 ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr); 521 522 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr); 523 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr); 524 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr); 525 PetscOptionsEnd(); 526 } 527 528 /* define matrix A */ 529 switch (lu->id.ICNTL(18)){ 530 case 0: /* centralized assembled matrix input (size=1) */ 531 if (!lu->myid) { 532 if (lua->isAIJ){ 533 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data; 534 nz = aa->nz; 535 ai = aa->i; aj = aa->j; lu->val = aa->a; 536 } else { 537 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data; 538 nz = aa->nz; 539 ai = aa->i; aj = aa->j; lu->val = aa->a; 540 } 541 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */ 542 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr); 543 ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr); 544 nz = 0; 545 for (i=0; i<M; i++){ 546 rnz = ai[i+1] - ai[i]; 547 while (rnz--) { /* Fortran row/col index! */ 548 lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++; 549 } 550 } 551 } 552 } 553 break; 554 case 3: /* distributed assembled matrix input (size>1) */ 555 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 556 valOnly = PETSC_FALSE; 557 } else { 558 valOnly = PETSC_TRUE; /* only update mat values, not row and col index */ 559 } 560 ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr); 561 break; 562 default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS."); 563 } 564 565 /* analysis phase */ 566 if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ 567 lu->id.n = M; 568 switch (lu->id.ICNTL(18)){ 569 case 0: /* centralized assembled matrix input */ 570 if (!lu->myid) { 571 lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn; 572 if (lu->id.ICNTL(6)>1){ 573 #if defined(PETSC_USE_COMPLEX) 574 lu->id.a = (mumps_double_complex*)lu->val; 575 #else 576 lu->id.a = lu->val; 577 #endif 578 } 579 } 580 break; 581 case 3: /* distributed assembled matrix input (size>1) */ 582 lu->id.nz_loc = nnz; 583 lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn; 584 if (lu->id.ICNTL(6)>1) { 585 #if defined(PETSC_USE_COMPLEX) 586 lu->id.a_loc = (mumps_double_complex*)lu->val; 587 #else 588 lu->id.a_loc = lu->val; 589 #endif 590 } 591 break; 592 } 593 lu->id.job=1; 594 #if defined(PETSC_USE_COMPLEX) 595 zmumps_c(&lu->id); 596 #else 597 dmumps_c(&lu->id); 598 #endif 599 if (lu->id.INFOG(1) < 0) { 600 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1)); 601 } 602 } 603 604 /* numerical factorization phase */ 605 if(!lu->id.ICNTL(18)) { 606 if (!lu->myid) { 607 #if defined(PETSC_USE_COMPLEX) 608 lu->id.a = (mumps_double_complex*)lu->val; 609 #else 610 lu->id.a = lu->val; 611 #endif 612 } 613 } else { 614 #if defined(PETSC_USE_COMPLEX) 615 lu->id.a_loc = (mumps_double_complex*)lu->val; 616 #else 617 lu->id.a_loc = lu->val; 618 #endif 619 } 620 lu->id.job=2; 621 #if defined(PETSC_USE_COMPLEX) 622 zmumps_c(&lu->id); 623 #else 624 dmumps_c(&lu->id); 625 #endif 626 if (lu->id.INFOG(1) < 0) { 627 if (lu->id.INFO(1) == -13) { 628 SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2)); 629 } else { 630 SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2)); 631 } 632 } 633 634 if (!lu->myid && lu->id.ICNTL(16) > 0){ 635 SETERRQ1(PETSC_ERR_LIB," lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16)); 636 } 637 638 if (lu->size > 1){ 639 if ((*F)->factor == FACTOR_LU){ 640 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 641 } else { 642 F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A; 643 } 644 F_diag->assembled = PETSC_TRUE; 645 } 646 (*F)->assembled = PETSC_TRUE; 647 lu->matstruc = SAME_NONZERO_PATTERN; 648 lu->CleanUpMUMPS = PETSC_TRUE; 649 PetscFunctionReturn(0); 650 } 651 652 /* Note the Petsc r and c permutations are ignored */ 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS" 655 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) { 656 Mat B; 657 Mat_MUMPS *lu; 658 PetscErrorCode ierr; 659 660 PetscFunctionBegin; 661 /* Create the factorization matrix */ 662 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 663 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 664 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 665 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 666 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 667 668 B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS; 669 B->factor = FACTOR_LU; 670 lu = (Mat_MUMPS*)B->spptr; 671 lu->sym = 0; 672 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 673 674 *F = B; 675 PetscFunctionReturn(0); 676 } 677 678 /* Note the Petsc r permutation is ignored */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS" 681 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) { 682 Mat B; 683 Mat_MUMPS *lu; 684 PetscErrorCode ierr; 685 686 PetscFunctionBegin; 687 /* Create the factorization matrix */ 688 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 689 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 690 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 691 ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr); 692 ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 693 694 B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS; 695 B->ops->getinertia = MatGetInertia_SBAIJMUMPS; 696 B->factor = FACTOR_CHOLESKY; 697 lu = (Mat_MUMPS*)B->spptr; 698 lu->sym = 2; 699 lu->matstruc = DIFFERENT_NONZERO_PATTERN; 700 701 *F = B; 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS" 707 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) { 708 PetscErrorCode ierr; 709 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 710 711 PetscFunctionBegin; 712 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 713 714 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 715 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 716 A->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 717 PetscFunctionReturn(0); 718 } 719 720 EXTERN_C_BEGIN 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS" 723 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 724 { 725 PetscErrorCode ierr; 726 PetscMPIInt size; 727 MPI_Comm comm; 728 Mat B=*newmat; 729 Mat_MUMPS *mumps; 730 731 PetscFunctionBegin; 732 if (reuse == MAT_INITIAL_MATRIX) { 733 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 734 } 735 736 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 737 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 738 739 mumps->MatDuplicate = A->ops->duplicate; 740 mumps->MatView = A->ops->view; 741 mumps->MatAssemblyEnd = A->ops->assemblyend; 742 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 743 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 744 mumps->MatDestroy = A->ops->destroy; 745 mumps->specialdestroy = MatDestroy_AIJMUMPS; 746 mumps->CleanUpMUMPS = PETSC_FALSE; 747 mumps->isAIJ = PETSC_TRUE; 748 749 B->spptr = (void*)mumps; 750 B->ops->duplicate = MatDuplicate_MUMPS; 751 B->ops->view = MatView_MUMPS; 752 B->ops->assemblyend = MatAssemblyEnd_AIJMUMPS; 753 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 754 B->ops->destroy = MatDestroy_MUMPS; 755 756 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 757 if (size == 1) { 758 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C", 759 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C", 761 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 762 } else { 763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C", 764 "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C", 766 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 767 } 768 769 ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr); 770 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 771 *newmat = B; 772 PetscFunctionReturn(0); 773 } 774 EXTERN_C_END 775 776 /*MC 777 MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed 778 and sequential matrices via the external package MUMPS. 779 780 If MUMPS is installed (see the manual for instructions 781 on how to declare the existence of external packages), 782 a matrix type can be constructed which invokes MUMPS solvers. 783 After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS). 784 785 If created with a single process communicator, this matrix type inherits from MATSEQAIJ. 786 Otherwise, this matrix type inherits from MATMPIAIJ. Hence for single process communicators, 787 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 788 for communicators controlling multiple processes. It is recommended that you call both of 789 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 790 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 791 without data copy. 792 793 Options Database Keys: 794 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions() 795 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 796 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level 797 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 798 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 799 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 800 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 801 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 802 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 803 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 804 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 805 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 806 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 807 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 808 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 809 810 Level: beginner 811 812 .seealso: MATSBAIJMUMPS 813 M*/ 814 815 EXTERN_C_BEGIN 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatCreate_AIJMUMPS" 818 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A) 819 { 820 PetscErrorCode ierr; 821 PetscMPIInt size; 822 MPI_Comm comm; 823 824 PetscFunctionBegin; 825 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 826 /* and AIJMUMPS types */ 827 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr); 828 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 829 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 830 if (size == 1) { 831 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 832 } else { 833 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 834 /* 835 Mat A_diag = ((Mat_MPIAIJ *)A->data)->A; 836 ierr = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr); 837 */ 838 } 839 ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 840 PetscFunctionReturn(0); 841 } 842 EXTERN_C_END 843 844 #undef __FUNCT__ 845 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS" 846 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) 847 { 848 PetscErrorCode ierr; 849 Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr; 850 851 PetscFunctionBegin; 852 ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 853 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 854 A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 855 PetscFunctionReturn(0); 856 } 857 858 EXTERN_C_BEGIN 859 #undef __FUNCT__ 860 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS" 861 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 862 { 863 Mat A; 864 Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr; 865 PetscErrorCode ierr; 866 867 PetscFunctionBegin; 868 /* 869 After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix 870 into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS. I would 871 like this to be done in the MatCreate routine, but the creation of this inner matrix requires 872 block size info so that PETSc can determine the local size properly. The block size info is set 873 in the preallocation routine. 874 */ 875 ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz); 876 A = ((Mat_MPISBAIJ *)B->data)->A; 877 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 EXTERN_C_END 881 882 EXTERN_C_BEGIN 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS" 885 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 886 { 887 PetscErrorCode ierr; 888 PetscMPIInt size; 889 MPI_Comm comm; 890 Mat B=*newmat; 891 Mat_MUMPS *mumps; 892 void (*f)(void); 893 894 PetscFunctionBegin; 895 if (reuse == MAT_INITIAL_MATRIX) { 896 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 897 } 898 899 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 900 ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr); 901 902 mumps->MatDuplicate = A->ops->duplicate; 903 mumps->MatView = A->ops->view; 904 mumps->MatAssemblyEnd = A->ops->assemblyend; 905 mumps->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 906 mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic; 907 mumps->MatDestroy = A->ops->destroy; 908 mumps->specialdestroy = MatDestroy_SBAIJMUMPS; 909 mumps->CleanUpMUMPS = PETSC_FALSE; 910 mumps->isAIJ = PETSC_FALSE; 911 912 B->spptr = (void*)mumps; 913 B->ops->duplicate = MatDuplicate_MUMPS; 914 B->ops->view = MatView_MUMPS; 915 B->ops->assemblyend = MatAssemblyEnd_SBAIJMUMPS; 916 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SBAIJMUMPS; 917 B->ops->destroy = MatDestroy_MUMPS; 918 919 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 920 if (size == 1) { 921 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C", 922 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 923 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C", 924 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 925 } else { 926 /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */ 927 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr); 928 if (f) { /* This case should always be true when this routine is called */ 929 mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f; 930 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C", 931 "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS", 932 MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr); 933 } 934 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C", 935 "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr); 936 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C", 937 "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr); 938 } 939 940 ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr); 941 ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr); 942 *newmat = B; 943 PetscFunctionReturn(0); 944 } 945 EXTERN_C_END 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatDuplicate_MUMPS" 949 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) { 950 PetscErrorCode ierr; 951 Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr; 952 953 PetscFunctionBegin; 954 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 955 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 /*MC 960 MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for 961 distributed and sequential matrices via the external package MUMPS. 962 963 If MUMPS is installed (see the manual for instructions 964 on how to declare the existence of external packages), 965 a matrix type can be constructed which invokes MUMPS solvers. 966 After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS). 967 968 If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ. 969 Otherwise, this matrix type inherits from MATMPISBAIJ. Hence for single process communicators, 970 MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 971 for communicators controlling multiple processes. It is recommended that you call both of 972 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 973 conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size) 974 without data copy. 975 976 Options Database Keys: 977 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions() 978 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric 979 . -mat_mumps_icntl_4 <0,...,4> - print level 980 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide) 981 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide) 982 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T 983 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements 984 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view 985 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide) 986 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide) 987 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide) 988 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide) 989 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold 990 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement 991 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold 992 993 Level: beginner 994 995 .seealso: MATAIJMUMPS 996 M*/ 997 998 EXTERN_C_BEGIN 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatCreate_SBAIJMUMPS" 1001 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A) 1002 { 1003 PetscErrorCode ierr; 1004 PetscMPIInt size; 1005 1006 PetscFunctionBegin; 1007 /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */ 1008 /* and SBAIJMUMPS types */ 1009 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr); 1010 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 1011 if (size == 1) { 1012 ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr); 1013 } else { 1014 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 1015 } 1016 ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 EXTERN_C_END 1020